开发者

XSLT: convert carriage returns to paragraphs in mixed node while preserving HTML

开发者 https://www.devze.com 2023-01-17 00:42 出处:网络
I\'m trying to wrap new lines in paragraphs without eliminating the HTML in the mixed node. I can get one or the other to work, but not both.

I'm trying to wrap new lines in paragraphs without eliminating the HTML in the mixed node. I can get one or the other to work, but not both.

XML:

<root>
    <mixed html="true">
        line 1

        <a href="http://google.com">line 2</a>

        <em>line 3</em>
    </mixed>
</root>

desired output:

 <div>
     <p>line 1</p>
     <p><a href="http://google.com">line 2</a></p>
     <p><em>line 3</em></p>
 </div>

these templates match the HTML:

<xsl:template match="//*[@html]//*">
    <xsl:element name="{name()}">
    <xsl:apply-templates select="* | @* | text()"/>
    </xsl:element>
</xsl:template>

<xsl:template match="//*[@html]//@*">
    <xsl:attribute name="{name(.)}">
        <xsl:copy-of select="."/>
    </xsl:attribute>
</xsl:template>

these templates convert new lines to paragraphs:

<xsl:template name="nl2p">

    <xsl:param name="input" />

    <xsl:variable name="output">
        <xsl:call-template name="newline-to-paragraph">
            <xsl:with-param name="input">
                <xsl:copy-of select="$input" />
            </xsl:with-param>
        </xsl:call-template>
    </xsl:variable>

    <xsl:copy-of select="$output" />

</xsl:template>

<!-- convert newline characters to <p></p> -->
<xsl:template name="newline-to-paragraph">

    <xsl:param name="input" />

    <xsl:variable name="output">

        <xsl:choose>
            <xsl:when test="contains($input, '&#10;')">
                <xsl:if test="substring-before($input, '&#10;') != ''">
                    <xsl:element name="p"><xsl:copy-of select="substring-before($input, '&#10;')" /></xsl:element>
                </xsl:if>
                <xsl:call-template name="newline-to-paragraph">
                    <xsl:with-param name="input">
                        <xsl:copy-of select="substring-after($input, '&#10;')" />
                    </xsl:with-param>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:if test="$input != ''">
开发者_JAVA技巧                    <xsl:element name="p"><xsl:copy-of select="$input" /></xsl:element>
                </xsl:if>
            </xsl:otherwise>
        </xsl:choose>

    </xsl:variable>

    <xsl:copy-of select="$output" />

</xsl:template>

Is this possible? I realize the nl2p template runs string functions on the nodeset -- does this destroy the HTML? Can I preserve it or use a specific order of operations to achieve this result?

Thanks in advance.

Edit: I'm using XSLT 1.0


EDIT: Sorry, I missed to split text nodes!

Most general problem: wraping non empty mixed content lines with p element

The problem here is how the input tree provider deals with white space only text nodes. Only Saxon seems to preserve white space only text nodes... Of course, adding xml:space="preserve" in the input, solves the problem for every other XSLT processor.

This stylesheet:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output omit-xml-declaration="yes" indent="yes" />
    <xsl:preserve-space elements="*" />
    <xsl:template match="*[@html='true' or @nl2p='true']">
        <div>
            <xsl:apply-templates select="node()[1]"/>
        </div>
    </xsl:template>
    <xsl:template match="node()" mode="open" name="open">
        <xsl:copy-of select="." />
        <xsl:apply-templates select="following-sibling::node()[1]" 
                             mode="open" />
    </xsl:template>
    <xsl:template match="*[@html='true' or @nl2p='true']/node()">
        <xsl:param name="pTail" select="''" />
        <p>
            <xsl:value-of select="$pTail" />
            <xsl:call-template name="open" />
        </p>
        <xsl:variable name="vNext" 
        select="following-sibling::text()[contains(., '&#xA;')][1]" />
        <xsl:apply-templates select="$vNext">
            <xsl:with-param name="pString" 
            select="substring-after($vNext, '&#xA;')" />
        </xsl:apply-templates>
    </xsl:template>
    <xsl:template match="text()[contains(., '&#xA;')]" 
                  mode="open" priority="1">
        <xsl:value-of select="substring-before(., '&#xA;')" />
    </xsl:template>
    <xsl:template match="*[@html='true' or @nl2p='true']
                          /text()[contains(., '&#xA;')]"
                  priority="1" name="text">
        <xsl:param name="pString" select="."/>
        <xsl:choose>
            <xsl:when test="contains($pString, '&#xA;')">
                <xsl:variable name="vOutput" 
                select="normalize-space(substring-before($pString, '&#xA;'))" />
                <xsl:if test="$vOutput">
                    <p>
                        <xsl:value-of select="$vOutput"/>
                    </p>
                </xsl:if>
                <xsl:call-template name="text">
                    <xsl:with-param name="pString"
                    select="substring-after($pString, '&#xA;')" />
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:apply-templates select="following-sibling::node()[1]">
                    <xsl:with-param name="pTail" select="$pString" />
                </xsl:apply-templates>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>
</xsl:stylesheet>

With this input (more complex than question):

<root>
    <mixed html="true" xml:space="preserve">
        line 1
        line 2
        <a href="http://google.com">line 2</a> after

        before <em>line 3</em><img src="http://example.org"/>
    </mixed>
</root>

Output:

<div>
<p>line 1</p>
<p>line 2</p>
<p>            <a href="http://google.com">line 2</a> after</p>
<p>            before <em>line 3</em><img src="http://example.org" /></p>
</div>

Reduce problem: wrapping non empty text nodes lines and every other node child with p element

This stylesheet:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:template match="node()|@*" name="identity">
        <xsl:copy>
            <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
    </xsl:template>
    <xsl:template match="*[@html='true']/*">
        <p>
            <xsl:call-template name="identity"/>
        </p>
    </xsl:template>
    <xsl:template match="*[@html='true']/text()" name="text">
        <xsl:param name="pString" select="."/>
        <xsl:choose>
            <xsl:when test="contains($pString,'&#xA;')">
                <xsl:call-template name="text">
                    <xsl:with-param name="pString"
                            select="substring-before($pString,'&#xA;')"/>
                </xsl:call-template>
                <xsl:call-template name="text">
                    <xsl:with-param name="pString"
                            select="substring-after($pString,'&#xA;')"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:when test="normalize-space($pString)">
                <p>
                    <xsl:value-of select="normalize-space($pString)"/>
                </p>
            </xsl:when>
        </xsl:choose>
    </xsl:template>
</xsl:stylesheet>

With question input sample, output:

<root>
    <mixed html="true">
        <p>line 1</p>
        <p><a href="http://google.com">line 2</a></p>
        <p><em>line 3</em></p>
    </mixed>
</root>

With my own more complex input (without @xml:space):

<root>
    <mixed html="true">
        line 1
        line 2
        <a href="http://google.com">line 2</a> after

        before <em>line 3</em><img src="http://example.org"/>
    </mixed>
</root>

Output:

<root>
    <mixed html="true">
        <p>line 1</p>
        <p>line 2</p>
        <p><a href="http://google.com">line 2</a></p>
        <p>after</p>
        <p>before</p>
        <p><em>line 3</em></p>
        <p><img src="http://example.org"></img></p>
    </mixed>
</root>


Well I developed this before I saw your comment that you're stuck with 1.0. But you said you're curious about 2.0, so here it is:

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
   <xsl:output method="html" indent="yes"/>

   <!-- Identity transform -->
   <xsl:template match="@* | node()">
      <xsl:copy>
         <xsl:apply-templates select="@* | node()"/>
      </xsl:copy>
   </xsl:template>

   <!-- surround all other elements with <p> -->
   <xsl:template match="*" priority="1">
      <p><xsl:copy><xsl:apply-templates select="@* | node()"/></xsl:copy></p>
   </xsl:template>

   <!-- recurse through root and mixed elements, but don't copy them. -->
   <xsl:template match="root | mixed" priority="2">
      <xsl:apply-templates select="node()"/>
   </xsl:template>

   <!-- Surround non-space text content with <p> if there are 
     newlines in the text, or element siblings. -->
   <xsl:template match="text()[contains(., '\n') or ../*]">
      <xsl:analyze-string select="." regex="\s*\n\s*">
         <xsl:non-matching-substring>
            <p><xsl:value-of select="."/></p>
         </xsl:non-matching-substring>
      </xsl:analyze-string>
   </xsl:template>

</xsl:stylesheet>

Given the input:

<?xml version="1.0" encoding="UTF-8"?>
<root>
   <mixed html="true">
      line 1

      <a href="http://google.com">line 2</a>

      <em>line 3</em>
   </mixed>
</root>

it yields the desired output:

<p>line 1</p>
<p><a href="http://google.com">line 2</a></p>
<p><em>line 3</em></p>

The only thing that requires XSLT 2.0 about this is the <xsl:analyze-string>. You could do a similar thing by writing a template that recursively processes strings, looking for '\n' characters, using normalize-space, and surrounding the remaining pieces of text with <p>.


A slight correction of your transformation:

<xsl:stylesheet version="1.0"
    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output omit-xml-declaration="yes" indent="yes" />
    <xsl:strip-space elements="*"/>

 <xsl:template match="node()|@*" name="identity">
     <xsl:copy>
       <xsl:apply-templates select="node()|@*"/>
     </xsl:copy>
 </xsl:template>

 <xsl:template match="*[@html='true']">
  <div>
    <xsl:apply-templates/>
  </div>
 </xsl:template>

 <xsl:template match="*[@html='true']/*">
  <p><xsl:call-template name="identity"/></p>
 </xsl:template>

 <xsl:template match="*[@html='true']/text()">
  <xsl:call-template name="nl2p"/>
 </xsl:template>

 <xsl:template name="nl2p">
    <xsl:param name="input" select="."/>

    <xsl:variable name="output">
        <xsl:call-template name="newline-to-paragraph">
            <xsl:with-param name="input">
                <xsl:copy-of select="$input" />
            </xsl:with-param>
        </xsl:call-template>
    </xsl:variable>

    <xsl:copy-of select="$output" />
 </xsl:template>

 <!-- convert newline characters to <p></p> -->
 <xsl:template name="newline-to-paragraph">
    <xsl:param name="input" />

    <xsl:variable name="output">
      <xsl:variable name="vlineText"
       select="normalize-space(substring-before($input, '&#10;'))"/>
      <xsl:variable name="vtextAfter"
       select="normalize-space(substring-after($input, '&#10;'))"/>
        <xsl:choose>
            <xsl:when test="contains($input, '&#10;')">
                <xsl:if test="$vlineText">
                  <p><xsl:copy-of select="$vlineText"/></p>
                </xsl:if>
                <xsl:call-template name="newline-to-paragraph">
                 <xsl:with-param name="input" select="$vtextAfter"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
              <xsl:if test="normalize-space($input)">
                <p><xsl:copy-of select="$input" /></p>
              </xsl:if>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:variable>

    <xsl:copy-of select="$output" />
 </xsl:template>

 <xsl:template match="/*">
  <xsl:apply-templates/>
 </xsl:template>
</xsl:stylesheet>

when applied on the provided XML document:

<root>
    <mixed html="true">
        line 1

        <a href="http://google.com">line 2</a>

        <em>line 3</em>
    </mixed>
</root>

produces the wanted result:

<div>
   <p>line 1</p>
   <p>
      <a href="http://google.com">line 2</a>
   </p>
   <p>
      <em>line 3</em>
   </p>
</div>
0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号