开发者

Problem merging similar XML files with XSL

开发者 https://www.devze.com 2022-12-28 22:39 出处:网络
I have two documents that I need to merge, that happen in a way that I don\'t seem to be able to find covered in other examples.Namely, that it needs to match not only on a node\'s attribute at one le

I have two documents that I need to merge, that happen in a way that I don't seem to be able to find covered in other examples. Namely, that it needs to match not only on a node's attribute at one level, but also on the value of an attribute a node level below that, to get that node's value.

I'm trying to take this sample:

<?xml version="1.0" encoding="UTF-8" ?>
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <marc:record>
    <marc:datafield tag="035" ind1=" " ind2=" ">
        <marc:subfield code="a">12345</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="041" ind1=" " ind2=" ">
        <marc:subfield code="a">eng</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="4">
        <marc:subfield code="a">Art</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="949" ind1=" " ind2=" ">
        <marc:subfield code="i">Review of conference proceedings</marc:subfield>
    </marc:datafield>
  </marc:record>
  <marc:record>
    <marc:datafield tag="035" ind1=" " ind2=" ">
        <marc:subfield code="a">54321</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="041" ind1=" " ind2=" ">
        <marc:subfield code="a">eng</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="4">
        <marc:subfield code="a">Byzantine</marc:subfield>
    </marc:datafield>
  </marc:record>
</marc:collection>

And when the value of "datafield" '035', "subfield" 'a' matches e.g. "12345"

<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
xmlns:fn="http://www.w3.org/2005/xpath-functions" xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:fo="http://www.w3.org/1999/XSL/Format">
  <marc:record>
    <marc:datafield ind2=" " ind1=" " tag="035">
        <marc:subfield code="a">12345</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">General works</marc:subfield>
        <marc:subfield code="x">Historians and critics</marc:subfield>
        <marc:subfield code="x">Smith, John, 1834-1917</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">Généralités</marc:subfield>
        <marc:subfield code="x">Historiens et critiques d'art</marc:subfield>
        <marc:subfield code="x">Dietrichson, Lorentz, 1834-1917</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2=" " ind1=" " tag="654">
        <marc:subfield code="a">General works</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2=" " ind1=" " tag="654">
        <marc:subfield code="a">Généralités</marc:subfield>
    开发者_如何转开发    <marc:subfield code="b">Historiens et critiques d'art</marc:subfield>
        <marc:subfield code="b">Smith, John, 1834-1917</marc:subfield>
    </marc:datafield>
  </marc:record>      
  <marc:record>
    <marc:datafield ind2=" " ind1=" " tag="035">
        <marc:subfield code="a">54321</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">General works</marc:subfield>
        <marc:subfield code="x">Historians and critics</marc:subfield>
        <marc:subfield code="x">Lange, Julius Henrik, 1838-1896</marc:subfield>
    </marc:datafield>
  </marc:record>
</marc:collection>

The result should be:

<?xml version="1.0" encoding="UTF-8" ?>
<marc:collection xmlns:marc="http://www.loc.gov/MARC21/slim"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <marc:record>
    <marc:datafield tag="035" ind1=" " ind2=" ">
        <marc:subfield code="a">12345</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="041" ind1=" " ind2=" ">
        <marc:subfield code="a">eng</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="4">
        <marc:subfield code="a">Art</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">General works</marc:subfield>
        <marc:subfield code="x">Historians and critics</marc:subfield>
        <marc:subfield code="x">Smith, John, 1834-1917</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">Généralités</marc:subfield>
        <marc:subfield code="x">Historiens et critiques d'art</marc:subfield>
        <marc:subfield code="x">Dietrichson, Lorentz, 1834-1917</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2=" " ind1=" " tag="654">
        <marc:subfield code="a">General works</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2=" " ind1=" " tag="654">
        <marc:subfield code="a">Généralités</marc:subfield>
        <marc:subfield code="b">Historiens et critiques d'art</marc:subfield>
        <marc:subfield code="b">Smith, John, 1834-1917</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="949" ind1=" " ind2=" ">
        <marc:subfield code="i">Review of conference proceedings</marc:subfield>
    </marc:datafield>
  </marc:record>
  <marc:record>
    <marc:datafield tag="035" ind1=" " ind2=" ">
        <marc:subfield code="a">54321</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="041" ind1=" " ind2=" ">
        <marc:subfield code="a">eng</marc:subfield>
    </marc:datafield>
    <marc:datafield tag="650" ind1=" " ind2="4">
        <marc:subfield code="a">Byzantine</marc:subfield>
    </marc:datafield>
    <marc:datafield ind2="4" ind1=" " tag="650">
        <marc:subfield code="a">General works</marc:subfield>
        <marc:subfield code="x">Historians and critics</marc:subfield>
        <marc:subfield code="x">Lange, Julius Henrik, 1838-1896</marc:subfield>
    </marc:datafield>
  </marc:record>
</marc:collection>

I've tried using examples that I've found that did lookups, but none of them seemed to work. I didn't include any of my XSL, because all of my results were disasterous. I keep looking at it, like it must be simple, but I'm just not getting any decent results. Any help or pointers would be greatly appreciated.

Thanks!


I think I have an answer for you. It's not the most elegant but it works. Basically, you run the stylesheet against one of the XML files you are trying to merge and then you use the document function to gain access to the other XML file. Iterate through each record in the first XML file and find the matching point. Then iterate through the second document and find the matching record and pull the appropriate nodes in.

<?xml version="1.0" encoding="UTF-8"?>

<xsl:variable name="doc2" select="document('FourBabyMarcs.xml')"/>

<xsl:template match="/">
    <marc:collection>
        <xsl:for-each select="marc:collection/marc:record">
            <marc:record>

                <xsl:for-each select="marc:leader">
                    <xsl:copy-of select="."/>
                </xsl:for-each>

                <xsl:for-each select="marc:controlfield">
                    <xsl:copy-of select="."/>
                </xsl:for-each>

                <xsl:for-each select="marc:datafield">
                    <xsl:copy-of select="."/>
                </xsl:for-each>

                <xsl:variable name="ID">
                    <xsl:value-of select="marc:datafield[@tag='035']/marc:subfield[@code='a']"/>
                </xsl:variable>

                <xsl:for-each select="$doc2/*/marc:record">
                        <xsl:if test="marc:datafield[@tag='035']/marc:subfield[@code='a']=$ID">
                            <xsl:for-each select="marc:datafield">
                                <xsl:if test="@tag='650'">
                                    <xsl:copy-of select="."/>
                                </xsl:if>
                                <xsl:if test="@tag='654'">
                                    <xsl:copy-of select="."/>
                                </xsl:if>
                            </xsl:for-each>
                        </xsl:if>
                </xsl:for-each>
            </marc:record>
        </xsl:for-each>
    </marc:collection>
</xsl:template>


The following solution uses keys to make efficient lookups in the merged document. It is assumed that all datafield elements except the matching datafield should be copied, and that there will be at most one matching datafield for each record. The URL to the document that should be merged is passed as a parameter.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:marc="http://www.loc.gov/MARC21/slim">
    <xsl:output method="xml" indent="yes"/>
    <xsl:param name="mergeFile"/>
    <xsl:variable name="mergeDoc" select="document($mergeFile)"/>

    <xsl:key name="datafield" match="marc:datafield" 
        use="concat(@tag, '|', marc:subfield[@code='a'])"/>

    <xsl:template match="/">
        <xsl:apply-templates select="node()|@*"/>
    </xsl:template>

    <xsl:template match="node()|@*">
        <xsl:copy>
            <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="marc:record">
        <xsl:copy>
            <xsl:apply-templates select="node()|@*"/>
            <xsl:apply-templates select="marc:datafield" mode="merge"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="marc:datafield" mode="merge">
        <xsl:variable name="datafieldKey" 
                      select="concat(@tag, '|', marc:subfield[@code='a'])"/>
        <!-- Make the other document the context node with for-each, so that
             key lookups will consult that document instead of the source 
             document. -->
        <xsl:for-each select="$mergeDoc">
            <xsl:for-each select="key('datafield', $datafieldKey)">
                <xsl:copy-of select="preceding-sibling::*"/>
                <xsl:copy-of select="following-sibling::*"/>
            </xsl:for-each>
        </xsl:for-each>
    </xsl:template>

</xsl:stylesheet>
0

精彩评论

暂无评论...
验证码 换一张
取 消