I need to use Python 2.4.4 to convert XML to and from a Python dictionary. All I need are the node names and values, I'm not worried about attributes because the XML I'm parsing doesn't have any. I can't use ElementTree
because that isn't available for 2.4.4, and I can't use 3rd party libraries due to my work environment. What's the easiest way for me to do this? Are there any good snippets?
Also, if there isn't an easy way to do this, are there any alternative serialization formats that Python 2.4.4 has native su开发者_如何学编程pport for?
I recently wrote some code to translate XML into a python data structure, although I did have to handle attributes. I used xml.dom.minidom
rather than ElementTree
, for a similar reason. I haven't actually tested this on Python 2.4.4, but I think it will work. I didn't write a reverse XML generator, though you can probably use the 'lispy_string' function I included to do this.
I also included some shortcuts specific to the application I was writing (explained in the docstring), but you might find those shortcuts useful too, from the sounds of it. Essentially, an xml tree technically translates into a dictionary of lists of dictionaries of lists of dictionaries of lists, etc. I omit creating the intermediary lists unless they are necessary, so you can reference elements by dictname[element1][element2]
rather than dictname[element1][0][element2][0]
and so on.
Attribute handling is a little kludgy, I strongly recommend reading the code before doing anything with attributes.
import sys
from xml.dom import minidom
def dappend(dictionary, key, item):
"""Append item to dictionary at key. Only create a list if there is more than one item for the given key.
dictionary[key]=item if key doesn't exist.
dictionary[key].append(item) if key exists."""
if key in dictionary.keys():
if not isinstance(dictionary[key], list):
lst=[]
lst.append(dictionary[key])
lst.append(item)
dictionary[key]=lst
else:
dictionary[key].append(item)
else:
dictionary.setdefault(key, item)
def node_attributes(node):
"""Return an attribute dictionary """
if node.hasAttributes():
return dict([(str(attr), str(node.attributes[attr].value)) for attr in node.attributes.keys()])
else:
return None
def attr_str(node):
return "%s-attrs" % str(node.nodeName)
def hasAttributes(node):
if node.nodeType == node.ELEMENT_NODE:
if node.hasAttributes():
return True
return False
def with_attributes(node, values):
if hasAttributes(node):
if isinstance(values, dict):
dappend(values, '#attributes', node_attributes(node))
return { str(node.nodeName): values }
elif isinstance(values, str):
return { str(node.nodeName): values,
attr_str(node): node_attributes(node)}
else:
return { str(node.nodeName): values }
def xmldom2dict(node):
"""Given an xml dom node tree,
return a python dictionary corresponding to the tree structure of the XML.
This parser does not make lists unless they are needed. For example:
'<list><item>1</item><item>2</item></list>' becomes:
{ 'list' : { 'item' : ['1', '2'] } }
BUT
'<list><item>1</item></list>' would be:
{ 'list' : { 'item' : '1' } }
This is a shortcut for a particular problem and probably not a good long-term design.
"""
if not node.hasChildNodes():
if node.nodeType == node.TEXT_NODE:
if node.data.strip() != '':
return str(node.data.strip())
else:
return None
else:
return with_attributes(node, None)
else:
#recursively create the list of child nodes
childlist=[xmldom2dict(child) for child in node.childNodes if (xmldom2dict(child) != None and child.nodeType != child.COMMENT_NODE)]
if len(childlist)==1:
return with_attributes(node, childlist[0])
else:
#if False not in [isinstance(child, dict) for child in childlist]:
new_dict={}
for child in childlist:
if isinstance(child, dict):
for k in child:
dappend(new_dict, k, child[k])
elif isinstance(child, str):
dappend(new_dict, '#text', child)
else:
print "ERROR"
return with_attributes(node, new_dict)
def load(fname):
return xmldom2dict(minidom.parse(fname))
def lispy_string(node, lst=None, level=0):
if lst==None:
lst=[]
if not isinstance(node, dict) and not isinstance(node, list):
lst.append(' "%s"' % node)
elif isinstance(node, dict):
for key in node.keys():
lst.append("\n%s(%s" % (spaces(level), key))
lispy_print(node[key], lst, level+2)
lst.append(")")
elif isinstance(node, list):
lst.append(" [")
for item in node:
lispy_print(item, lst, level)
lst.append("]")
return lst
if __name__=='__main__':
data = minidom.parse(sys.argv[1])
d=xmldom2dict(data)
print d
Question Serialize Python dictionary to XML lists some ways of XML serialization. As for alternative serialization formats, I guess pickle
module is a nice tool for it.
Dicts in python are not ordered, remember this. I have a very basic code, which is small and does not require any external modules. Bad thing is that it does not support any kind of XML attributes, but you said
I'm not worried about attributes
,so here it is:
def d2x(d, root="root"):
op = lambda tag: '<' + tag + '>'
cl = lambda tag: '</' + tag + '>\n'
ml = lambda v,xml: xml + op(key) + str(v) + cl(key)
xml = op(root) + '\n' if root else ""
for key,vl in d.iteritems():
vtype = type(vl)
if vtype is list:
for v in vl:
xml = ml(v,xml)
if vtype is dict: xml = ml('\n' + d2x(vl,None),xml)
if vtype is not list and vtype is not dict: xml = ml(vl,xml)
xml += cl(root) if root else ""
return xml
Example of usage:
mydict = {
"boolean":False,
"integer":12,
"float":3.1,
"listitems":["item1","item2"],
"string":"Hello world",
"dictionary":{
"key1":1,
"key2":2,
"dictindict":{
"a":"aaa",
"b":"bbb"
}
}
}
print d2x (mydict,"superxml")
This will print:
<superxml>
<string>Hello world</string>
<dictionary>
<key2>2</key2>
<key1>1</key1>
<dictindict>
<a>aaa</a>
<b>bbb</b>
</dictindict>
</dictionary>
<float>3.1</float>
<listitems>item1</listitems>
<listitems>item2</listitems>
<boolean>False</boolean>
<integer>12</integer>
</superxml>
For serializing a Python dict to XML, the following Python class works well for me. Over some other solutions, it has the advantage that it is quite simple and that it does proper XML encoding. The script is based on this answer. It has only one extension: By passing the list_mappings
dictionary to the constructor, you can specify how a single list item (a child
inside the children
attribute in the example below) is named.
from xml.dom.minidom import Document
class DictToXML(object):
default_list_item_name = "item"
def __init__(self, structure, list_mappings={}):
self.doc = Document()
if len(structure) == 1:
rootName = str(list(structure.keys())[0])
self.root = self.doc.createElement(rootName)
self.list_mappings = list_mappings
self.doc.appendChild(self.root)
self.build(self.root, structure[rootName])
def build(self, father, structure):
if type(structure) == dict:
for k in structure:
tag = self.doc.createElement(k)
father.appendChild(tag)
self.build(tag, structure[k])
elif type(structure) == list:
tag_name = self.default_list_item_name
if father.tagName in self.list_mappings:
tag_name = self.list_mappings[father.tagName]
for l in structure:
tag = self.doc.createElement(tag_name)
self.build(tag, l)
father.appendChild(tag)
else:
data = str(structure)
tag = self.doc.createTextNode(data)
father.appendChild(tag)
def display(self):
print(self.doc.toprettyxml(indent=" "))
def get_string(self):
return self.doc.toprettyxml(indent=" ")
if __name__ == '__main__':
example = {'sibling': {'couple': {'mother': 'mom', 'father': 'dad', 'children': [{'child': 'foo'},
{'child': 'bar'}]}}}
xml = DictToXML(example)
xml.display()
It gives the following output:
<?xml version="1.0" ?>
<sibling>
<couple>
<children>
<child>
<name>foo</name>
</child>
<child>
<name>bar</name>
</child>
</children>
<father>dad</father>
<mother>mom</mother>
</couple>
</sibling>
Grey's link includes some solutions that look pretty robust. If you want to roll your own though, you could use xml.dom.node's childNode member recursively, terminating when node.childNode = None.
精彩评论