开发者

Creating XML file from XLS Cells via indentations

开发者 https://www.devze.com 2023-03-06 16:11 出处:网络
What I am trying to do is creating a XML file by parsing a XLS file. An example should be more relevant:

What I am trying to do is creating a XML file by parsing a XLS file. An example should be more relevant:

| tag1      |           |           |           |
|           | tag2      |           |           |
|           |           | tag3      | tag3Value |
|           |           | tag4      | tag4Value |
|           | tag5      |           |           |
|           |           | tag6      | tag6Value |
|           |           |           |           |

If we imagine those are cells, will be equivalent for the following .xml code.

<tag1>
    <tag2>
        <tag3> tag3Value </tag3>
        <tag4> tag4Value </tag4>
    </tag2>
    <tag5>
        <tag6> tag6Value </tag6>
    </tag5>
</tag1>

That wouldn't be so hard by managing one cell at a time and just doing "<" & Cell(x,y) & ">" But I wanted an elegant solution. Here is my implementation so far:

Sub lol()
    Sheet1.Activate

    Dim xmlDoc As MSXML2.DOMDocument
    Dim xmlNode As MSXML2.IXMLDOMNode

    Set xmlDoc = New MSXML2.DOMDocument
    createXML xmlDoc
End Sub

Sub createXML(xmlDoc As MSXML2.DOMDocument)
    Dim newNode As MSXML2.IXMLDOMNode

    If Not (Cells(1, 1) = "") Then

        'newNode.nodeName = Cells(1, 1)
        ReplaceNodeName xmlDoc, newNode, Cells(1, 1)

        createXMLpart2 xmlDoc, newNode, 2, 2
        xmlDoc.appendChild newNode
    End If
    xmlDoc.Save "E:\saved_cdCatalog.xml"
End Sub

Sub createXMLpart2(xmlDoc As MSXML2.DOMDocument, node As MSXML2.IXMLDOMElement, i As Integer, j As Integer)
     Dim newNode As MSXML2.IXMLDOMElement
     If Not (Cells(i, j) = "") Then

        If (Cells(i, j + 1) = "") Then

            'newNode.nodeName = Cells(i, j)
            ReplaceNodeName xmlDoc, newNode, Cells(i, j)

            createXMLpart2 xmlDoc, newNode, i + 1, j + 1
        Else
            'newNode.nodeName = "#text"
            ReplaceNodeName xmlDoc, newNode, "#text"

            'newNode.nodeValue = Cells(i, j + 1)
            createXMLpart2 xmlDoc, newNode, i + 1, j
        End If
        node.appendChild (newNode)
    End If
End Sub

Private Sub ReplaceNodeName(oDoc As DOMDocument, oElement As IXMLDOMElement, newName As String)
        Dim ohElement As IXMLDOMElement
        Dim sElement As IXMLDOMElement
        Dim oChild As IXMLDOMNode

        ' search the children '
        If Not oElement Is Nothing Then
                Set ohElement = oElement.parentNode
                Set sElement = oDoc.createElement(newName)

                For Each oChild In oElement.childNodes
                        Call sElement.appendChild(oChild)
                Next

                Call ohElement.replaceChild(sElement, oElement)
        End If
End Sub

Problems: at first I didn't realize that I can't change the name of a node by doing node.开发者_Go百科nodeName = "newName" I have found a solution on StackOverflow actually: Change NodeName of an XML tag element using MSXML

So i've commented my attempts at renaming the nodes and tried the version with the ReplaceNodeName method.

The actual problem: node.appendChild (newNode) from createXMLpart2 is giving me a problem: it sais that the variable "newNode" is no set. I am puzzled.


Maybe something like this...

Sub Tester()

Dim r As Range
Dim xmlDoc As New MSXML2.DOMDocument
Dim xmlNodeP As MSXML2.IXMLDOMNode
Dim xmlNodeTmp As MSXML2.IXMLDOMNode
Dim bDone As Boolean

    Set r = ActiveSheet.Range("A1")

    Do While Not r Is Nothing

        Set xmlNodeTmp = xmlDoc.createElement(r.Value)
        If Len(r.Offset(0, 1).Value) > 0 Then
            xmlNodeTmp.appendChild xmlDoc.createTextNode(r.Offset(0, 1).Value)
        End If

        If Not xmlNodeP Is Nothing Then
            xmlNodeP.appendChild xmlNodeTmp
        Else
            xmlDoc.appendChild xmlNodeTmp
        End If
        Set xmlNodeP = xmlNodeTmp

        If Len(r.Offset(1, 0).Value) > 0 Then
            Set r = r.Offset(1, 0) 'sibling node
            Set xmlNodeP = xmlNodeP.ParentNode
        ElseIf Len(r.Offset(1, 1).Value) > 0 Then
            Set r = r.Offset(1, 1) 'child node
        Else
            Set r = r.Offset(1, 0)
            Set xmlNodeP = xmlNodeP.ParentNode
            Do While Len(r.Value) = 0
                If r.Column > 1 Then
                    Set r = r.Offset(0, -1)
                    Set xmlNodeP = xmlNodeP.ParentNode
                Else
                    Set r = Nothing
                    Exit Do
                End If
            Loop
        End If

    Loop
    Debug.Print xmlDoc.XML
End Sub


I'm not an expert on VBA, but looking at your code, I don't understand why you think newNode would be initialized.

At the beginning of createXMLpart2(), you declare it as Dim newNode As MSXML2.IXMLDOMElement, but where do you give it a value?


I decided to go pure VBA code (e.g. a bunch of loops). What I started with was fairly small, but then I thought "what if the requirements change?". In other words, in addition to your example, what if the following also became valid:

tag1                            
    |tag2   |   |   |   |   |   |
    |   |tag3   |tag3value  |   |   |   |
    |   |tag4   |tag4value  |   |   |   |
    |tag5   |   |   |   |   |   |
    |   |tag6   |tag6value  |   |   |   |
tag9    |   |   |   |   |   |   |
    |tag10  |tag10value |   |   |   |   |
tag11   |   |   |   |   |   |   |
    |tag12  |   |   |   |   |   |
    |   |tag13  |   |   |   |   |
    |   |   |tag14  |tag14value |   |   |
    |   |   |tag15  |tag15value |   |   |
tag16   |tag16value |   |   |   |   |   |
tag17   |   |   |   |   |   |   |
    |tag18  |   |   |   |   |   |
    |   |tag19  |   |   |   |   |
    |   |   |tag20  |   |   |   |
    |   |   |   |tag21  |   |   |
    |   |   |   |   |tag22  |   |
    |   |   |   |   |   |tag23  |tag23value
    |   |   |   |   |   |tag24  |tag24value
    |   |   |   |tag25  |tag25value |   |

That might look like a bunch of gobbledygook, but it's basically putting tags with values before and beyond column 4.

If we were to dress up this xml, it would look something like this:

<tag1>
    <tag2>
        <tag3>tag3value</tag3>
        <tag4>tag4value</tag4>
    </tag2>
    <tag5>
        <tag6>tag6value</tag6>
    </tag5>
</tag1>
<tag9>
    <tag10>tag10value</tag10>
</tag9>
<tag11>
    <tag12>
        <tag13>
            <tag14>tag14value</tag14>
            <tag15>tag15value</tag15>
        </tag13>
    </tag12>
</tag11>
<tag16>tag16value</tag16>
<tag17>
    <tag18>
        <tag19>
            <tag20>
                <tag21>
                    <tag22>
                        <tag23>tag23value</tag23>
                        <tag24>tag24value</tag24>
                    </tag22>
                </tag21>
                <tag25>tag25value</tag25>
            </tag20>
        </tag19>
    </tag18>
</tag17>

And that's why my module does:

'Assumptions:
'1.  No blank columns
'2.  XML values start at A1
Option Explicit

Dim m_lCurrentRow As Long 'The current row in the range of cells
Dim m_xmlSheetRange As Range 'The current range of cells containing values

'Let the fun begin
Sub DoTheFun()
    Dim lastUsedCell As Range 'The cell in the outer most row in th outer most column that contains a value
    Dim lTotalRows As Long 'Total number of rows
    Dim iCurrentColumn As Integer


    'Find the very last used cell on a Worksheet:
    'http://www.ozgrid.com/VBA/ExcelRanges.htm
    Set lastUsedCell = Cells.Find(What:="*", After:=[A1], SearchDirection:=xlPrevious)

    'Set the range of values to check from A1 to wherever the last cell is located
    Set m_xmlSheetRange = Range("$A$1:" & lastUsedCell.Address)
    'Initialize (Sheets have an Option Base 1)
    iCurrentColumn = 1
    m_lCurrentRow = 1
    lTotalRows = m_xmlSheetRange.Rows.Count

    'Loop through all rows to create the XML string
    Do Until m_lCurrentRow > lTotalRows
        'Make sure adjacent cell does not have a value.
        If m_xmlSheetRange(m_lCurrentRow, iCurrentColumn + 1) = "" Then

            'Start the search to find a tag with a value (write the surrounding tags as needed)
            Debug.Print FindTagWithValue(iCurrentColumn)

            iCurrentColumn = FindTagColumn(iCurrentColumn)
        Else 'Adjacent cell has a value so just write out the tag and value
            Debug.Print BuildTagWithValue(iCurrentColumn)
        End If
    Loop


End Sub
'Recursive function that calls itself till a tag with a value is found.
Function FindTagWithValue(iCurrentColumn As Integer) As String
    Dim sXml As String
    Dim sMyTag As String
    Dim iPassedColumn As Integer
    Dim bTagClosed As Boolean

    iPassedColumn = iCurrentColumn

    'Get the opening and surrounding tag
    sMyTag = m_xmlSheetRange(m_lCurrentRow, iCurrentColumn)
    sXml = String(iCurrentColumn - 1, vbTab) & "<" & sMyTag & ">" & vbCrLf

    'Move to the next cell and next row
    m_lCurrentRow = m_lCurrentRow + 1
    iCurrentColumn = iCurrentColumn + 1

    bTagClosed = False 'Intialize

    Do
        If m_xmlSheetRange(m_lCurrentRow, iCurrentColumn + 1) = "" Then
            'Adjancent cell to current position does not have value.  Start recursion till we find it.
            sXml = sXml & FindTagWithValue(iCurrentColumn)
        Else
            'A value for a tag has been found.  Build the xml for the tag and tag value
            sXml = sXml & BuildTagWithValue(iCurrentColumn)

            'See if next row is on same level
            If m_xmlSheetRange(m_lCurrentRow, iCurrentColumn) <> "" And iPassedColumn < iCurrentColumn Then
                sXml = sXml & String(iPassedColumn - 1, vbTab) & "</" & sMyTag & ">" & vbCrLf
                sMyTag = m_xmlSheetRange(m_lCurrentRow, iCurrentColumn)
                bTagClosed = True
            End If
        End If
    'Keep looping till the current cell is empty or until the current column is less than the passed column
    Loop Until m_xmlSheetRange(m_lCurrentRow, iCurrentColumn) = "" Or iPassedColumn >= iCurrentColumn

    If Not bTagClosed Then
        sXml = sXml & String(iPassedColumn - 1, vbTab) & "</" & sMyTag & ">" & vbCrLf
    End If

    FindTagWithValue = sXml

    Exit Function

End Function
'A cell with a value has been found that also contains an adjacent cell with a value.  Wrap the tag around the value.
Function BuildTagWithValue(iCurrentColumn As Integer)
    Dim sXml As String
    Dim sMyTag As String
    Dim sMyTagValue As String

    Do

        sMyTag = m_xmlSheetRange(m_lCurrentRow, iCurrentColumn)
        sMyTagValue = m_xmlSheetRange(m_lCurrentRow, iCurrentColumn + 1)
        sXml = sXml & String(iCurrentColumn - 1, vbTab) & "<" & sMyTag & ">" & sMyTagValue & "</" & sMyTag & ">" & vbCrLf
        m_lCurrentRow = m_lCurrentRow + 1
    'Keep looping till you run out of tags with values in this column
    Loop Until m_xmlSheetRange(m_lCurrentRow, iCurrentColumn + 1) = ""

    'Find the next valid column
    iCurrentColumn = FindTagColumn(iCurrentColumn)

    BuildTagWithValue = sXml

    Exit Function
End Function
'Find the cell on the current row which contains a value.
Function FindTagColumn(iCurrentColumn) As Integer
    Dim bValidTagFound As Boolean

    bValidTagFound = False
    Do Until bValidTagFound
        If m_xmlSheetRange(m_lCurrentRow, iCurrentColumn) = "" Then
            If iCurrentColumn = 1 Then
                bValidTagFound = True
            Else
                iCurrentColumn = IIf(iCurrentColumn = 1, 1, iCurrentColumn - 1)
            End If
        Else
            bValidTagFound = True
            If iCurrentColumn = 1 Then
                'Do nothing
            Else
                If m_xmlSheetRange(m_lCurrentRow, iCurrentColumn - 1) <> "" Then
                    iCurrentColumn = iCurrentColumn - 1
                End If
            End If
        End If
    Loop

    FindTagColumn = iCurrentColumn
    Exit Function
End Function

So, it is a bit longer than expected and might be more whacky than elegant...but it works.

0

精彩评论

暂无评论...
验证码 换一张
取 消