How does one select the first sibling of a xml node with all its child nodes and apply some transformations on it? So far I only succeeded in selected either the first sibling and only the first sibling (no child nodes that is) or everything following the xml node.
Say we have xhtml like this:
<div class="chapter">Chapter <span class="number">1.1</span> Lorum ipsum</div>
<h2 class="article">Article <span class="number">1.</span> Lorum ipsum</h2>
<p>Lorum ipsum</p>
And the result we are after is xml like this:
<chapter>
<heading>
<label>Chapter</chapter>开发者_JS百科
<number>1.1</number>
<title>Lorum ipsum</title>
</heading>
<article>
<heading>
<label>Article</chapter>
<number>1.</number>
<title>Lorum ipsum</title>
</heading>
<par>Lorum ipsum</par>
</article>
</chapter>
My guess is that I need to do some regex magic to get the label and title tags right, but if this could also been done using plain xslt that would be great.
This XSLT 1.0 transformation:
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xhtml="http://www.w3.org/1999/xhtml"
exclude-result-prefixes="xhtml"
>
<xsl:output encoding="utf-8" />
<!-- the identity template (copies all nodes verbatim, unless
more specific templates implement different behavior) -->
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*" />
</xsl:copy>
</xsl:template>
<!-- start by applying templates to all chapter divs -->
<xsl:template match="xhtml:html">
<text>
<xsl:apply-templates select="xhtml:body/xhtml:div[@class='chapter']" />
</text>
</xsl:template>
<!-- chapter div: generate heading, apply templates to adjacent h2 -->
<xsl:template match="xhtml:div[@class='chapter']">
<chapter>
<xsl:apply-templates select="." mode="heading" />
<!-- ... where the first preceding chapter div has the same ID -->
<xsl:apply-templates select="
following-sibling::xhtml:h2[
generate-id(preceding-sibling::xhtml:div[@class='chapter'][1])
=
generate-id(current())
]
"/>
</chapter>
</xsl:template>
<!-- h2: generate heading, apply templates to adjacent paras -->
<xsl:template match="xhtml:h2[@class='article']">
<article>
<xsl:apply-templates select="." mode="heading" />
<xsl:apply-templates select="
following-sibling::xhtml:p[
generate-id(preceding-sibling::xhtml:h2[@class='article'][1])
=
generate-id(current())
]
"/>
</article>
</xsl:template>
<!-- headings follow the same scheme, so we can use a unified template -->
<xsl:template match="xhtml:div | xhtml:h2" mode="heading">
<heading>
<label>
<xsl:value-of select="normalize-space(text()[1])" />
</label>
<number>
<xsl:value-of select="normalize-space(xhtml:span[@class='number'])" />
</number>
<title>
<xsl:value-of select="normalize-space(text()[2])" />
</title>
</heading>
</xsl:template>
<xsl:template match="xhtml:p">
<par>
<xsl:apply-templates select="node()" />
</par>
</xsl:template>
</xsl:stylesheet>
when applied to
<html xmlns="http://www.w3.org/1999/xhtml">
<body>
<div class="chapter">Chapter <span class="number">1.1</span> Lorum ipsum</div>
<h2 class="article">Article <span class="number">1.</span> Lorum ipsum</h2>
<p>Lorum ipsum A</p>
<p>Lorum ipsum B</p>
<h2 class="article">Article <span class="number">2.</span> Lorum ipsum</h2>
<p>Lorum ipsum D</p>
<h2 class="article">Article <span class="number">3.</span> Lorum ipsum</h2>
<p>Lorum ipsum E</p>
<p>Lorum ipsum F</p>
<div class="chapter">Chapter <span class="number">2.1</span> Lorum ipsum</div>
<h2 class="article">Article <span class="number">1.</span> Lorum ipsum</h2>
<p>Lorum ipsum G</p>
</body>
</html>
yields:
<text>
<chapter>
<heading>
<label>Chapter</label>
<number>1.1</number>
<title>Lorum ipsum</title>
</heading>
<article>
<heading>
<label>Article</label>
<number>1.</number>
<title>Lorum ipsum</title>
</heading>
<par>Lorum ipsum A</par>
<par>Lorum ipsum B</par>
</article>
<article>
<heading>
<label>Article</label>
<number>2.</number>
<title>Lorum ipsum</title>
</heading>
<par>Lorum ipsum D</par>
</article>
<article>
<heading>
<label>Article</label>
<number>3.</number>
<title>Lorum ipsum</title>
</heading>
<par>Lorum ipsum E</par>
<par>Lorum ipsum F</par>
</article>
</chapter>
<chapter>
<heading>
<label>Chapter</label>
<number>2.1</number>
<title>Lorum ipsum</title>
</heading>
<article>
<heading>
<label>Article</label>
<number>1.</number>
<title>Lorum ipsum</title>
</heading>
<par>Lorum ipsum G</par>
</article>
</chapter>
</text>
This stylesheet creates the desired output:
<xsl:template match="html:div[@class='chapter']" mode="chapter">
<xsl:element name="{@class}">
<heading>
<xsl:apply-templates mode="chapter" />
</heading>
<xsl:apply-templates select="following-sibling::html:h2[generate-id(preceding-sibling::html:div[@class='chapter'][1])=generate-id(current())]" mode="chapter" />
</xsl:element>
</xsl:template>
<!--template for h2 in "chapter" mode, creates article content for the chapter-->
<xsl:template match="html:h2[@class='article']" mode="chapter">
<xsl:element name="{@class}">
<heading>
<xsl:apply-templates mode="chapter"/>
</heading>
<xsl:apply-templates select="following-sibling::html:p[generate-id(preceding-sibling::html:h2[@class='article'][1])=generate-id(current())]" mode="chapter" />
</xsl:element>
</xsl:template>
<xsl:template match="text()[following-sibling::html:span[@class='number']]" mode="chapter">
<label><xsl:value-of select="normalize-space()"/></label>
</xsl:template>
<!--Generate an (number) element using the class attribute as the name of the element-->
<xsl:template match="html:span[@class='number']" mode="chapter">
<xsl:element name="{@class}">
<xsl:value-of select="."/>
</xsl:element>
</xsl:template>
<!--title elements created for text nodes before the -->
<xsl:template match="text()[preceding-sibling::html:span[@class='number']]" mode="chapter">
<title><xsl:value-of select="normalize-space()"/></title>
</xsl:template>
<!--Template in "chapter" mode, creates a par element inside the article-->
<xsl:template match="html:p" mode="chapter">
<para><xsl:value-of select="normalize-space()"/></para>
</xsl:template>
<!--prevent text from bleeding through in output-->
<xsl:template match="text()" mode="chapter"/>
Using Tomalak's example input XML, produces:
<?xml version="1.0" encoding="UTF-8"?>
<book>
<chapter>
<heading>
<label>Chapter</label>
<number>1.1</number>
<title>Lorum ipsum</title>
</heading>
<article>
<heading>
<label>Article</label>
<number>1.</number>
<title>Lorum ipsum</title>
</heading>
<para>Lorum ipsum A</para>
<para>Lorum ipsum B</para>
</article>
<article>
<heading>
<label>Article</label>
<number>2.</number>
<title>Lorum ipsum</title>
</heading>
<para>Lorum ipsum D</para>
</article>
<article>
<heading>
<label>Article</label>
<number>3.</number>
<title>Lorum ipsum</title>
</heading>
<para>Lorum ipsum E</para>
<para>Lorum ipsum F</para>
</article>
</chapter>
<chapter>
<heading>
<label>Chapter</label>
<number>2.1</number>
<title>Lorum ipsum</title>
</heading>
<article>
<heading>
<label>Article</label>
<number>1.</number>
<title>Lorum ipsum</title>
</heading>
<para>Lorum ipsum G</para>
</article>
</chapter>
</book>
精彩评论