XML to/from a Python dictionary

2020-06-23 05:23发布

问题:

I need to use Python 2.4.4 to convert XML to and from a Python dictionary. All I need are the node names and values, I'm not worried about attributes because the XML I'm parsing doesn't have any. I can't use ElementTree because that isn't available for 2.4.4, and I can't use 3rd party libraries due to my work environment. What's the easiest way for me to do this? Are there any good snippets?

Also, if there isn't an easy way to do this, are there any alternative serialization formats that Python 2.4.4 has native support for?

回答1:

Question Serialize Python dictionary to XML lists some ways of XML serialization. As for alternative serialization formats, I guess pickle module is a nice tool for it.



回答2:

I recently wrote some code to translate XML into a python data structure, although I did have to handle attributes. I used xml.dom.minidom rather than ElementTree, for a similar reason. I haven't actually tested this on Python 2.4.4, but I think it will work. I didn't write a reverse XML generator, though you can probably use the 'lispy_string' function I included to do this.

I also included some shortcuts specific to the application I was writing (explained in the docstring), but you might find those shortcuts useful too, from the sounds of it. Essentially, an xml tree technically translates into a dictionary of lists of dictionaries of lists of dictionaries of lists, etc. I omit creating the intermediary lists unless they are necessary, so you can reference elements by dictname[element1][element2] rather than dictname[element1][0][element2][0] and so on.

Attribute handling is a little kludgy, I strongly recommend reading the code before doing anything with attributes.

import sys
from xml.dom import minidom

def dappend(dictionary, key, item):
    """Append item to dictionary at key.  Only create a list if there is more than one item for the given key.
    dictionary[key]=item if key doesn't exist.
    dictionary[key].append(item) if key exists."""
    if key in dictionary.keys():
        if not isinstance(dictionary[key], list):
            lst=[]
            lst.append(dictionary[key])
            lst.append(item)
            dictionary[key]=lst
        else:
            dictionary[key].append(item)
    else:
        dictionary.setdefault(key, item)

def node_attributes(node):
    """Return an attribute dictionary """
    if node.hasAttributes():
        return dict([(str(attr), str(node.attributes[attr].value)) for attr in node.attributes.keys()])
    else:
        return None

def attr_str(node):
    return "%s-attrs" % str(node.nodeName)

def hasAttributes(node):
    if node.nodeType == node.ELEMENT_NODE:
        if node.hasAttributes():
            return True
    return False

def with_attributes(node, values):
    if hasAttributes(node):
        if isinstance(values, dict):
            dappend(values, '#attributes', node_attributes(node))
            return { str(node.nodeName): values }
        elif isinstance(values, str):
            return { str(node.nodeName): values,
                     attr_str(node): node_attributes(node)}
    else:
        return { str(node.nodeName): values }

def xmldom2dict(node):
    """Given an xml dom node tree,
    return a python dictionary corresponding to the tree structure of the XML.
    This parser does not make lists unless they are needed.  For example:

    '<list><item>1</item><item>2</item></list>' becomes:
    { 'list' : { 'item' : ['1', '2'] } }
    BUT
    '<list><item>1</item></list>' would be:
    { 'list' : { 'item' : '1' } }

    This is a shortcut for a particular problem and probably not a good long-term design.
    """
    if not node.hasChildNodes():
        if node.nodeType == node.TEXT_NODE:
            if node.data.strip() != '':
                return str(node.data.strip())
            else:
                return None
        else:
            return with_attributes(node, None)
    else:
        #recursively create the list of child nodes
        childlist=[xmldom2dict(child) for child in node.childNodes if (xmldom2dict(child) != None and child.nodeType != child.COMMENT_NODE)]
        if len(childlist)==1:
            return with_attributes(node, childlist[0])
        else:
            #if False not in [isinstance(child, dict) for child in childlist]:
            new_dict={}
            for child in childlist:
                if isinstance(child, dict):
                    for k in child:
                        dappend(new_dict, k, child[k])
                elif isinstance(child, str):
                    dappend(new_dict, '#text', child)
                else:
                    print "ERROR"
            return with_attributes(node, new_dict)

def load(fname):
    return xmldom2dict(minidom.parse(fname))

def lispy_string(node, lst=None, level=0):
    if lst==None:
        lst=[]
    if not isinstance(node, dict) and not isinstance(node, list):
        lst.append(' "%s"' % node)
    elif isinstance(node, dict):
        for key in node.keys():
            lst.append("\n%s(%s" % (spaces(level), key))
            lispy_print(node[key], lst, level+2)
            lst.append(")")
    elif isinstance(node, list):
        lst.append(" [")
        for item in node:
            lispy_print(item, lst, level)
        lst.append("]")
    return lst

if __name__=='__main__':
    data = minidom.parse(sys.argv[1])

    d=xmldom2dict(data)

    print d


回答3:

Dicts in python are not ordered, remember this. I have a very basic code, which is small and does not require any external modules. Bad thing is that it does not support any kind of XML attributes, but you said

I'm not worried about attributes

,so here it is:

def d2x(d, root="root"):

    op = lambda tag: '<' + tag + '>'
    cl = lambda tag: '</' + tag + '>\n'
    ml = lambda v,xml: xml + op(key) + str(v) + cl(key)

    xml = op(root) + '\n' if root else ""

    for key,vl in d.iteritems():
        vtype = type(vl)
        if vtype is list: 
            for v in vl:
                xml = ml(v,xml)         
        if vtype is dict: xml = ml('\n' + d2x(vl,None),xml)         
        if vtype is not list and vtype is not dict: xml = ml(vl,xml)

    xml += cl(root) if root else ""

    return xml

Example of usage:

mydict = {
"boolean":False,
"integer":12,
"float":3.1,
"listitems":["item1","item2"],
"string":"Hello world", 
"dictionary":{
    "key1":1,
    "key2":2,
    "dictindict":{
                "a":"aaa",
                "b":"bbb"
                }
            }
}
print d2x (mydict,"superxml")

This will print:

<superxml>
<string>Hello world</string>
<dictionary>
<key2>2</key2>
<key1>1</key1>
<dictindict>
<a>aaa</a>
<b>bbb</b>
</dictindict>
</dictionary>
<float>3.1</float>
<listitems>item1</listitems>
<listitems>item2</listitems>
<boolean>False</boolean>
<integer>12</integer>
</superxml>


回答4:

For serializing a Python dict to XML, the following Python class works well for me. Over some other solutions, it has the advantage that it is quite simple and that it does proper XML encoding. The script is based on this answer. It has only one extension: By passing the list_mappings dictionary to the constructor, you can specify how a single list item (a child inside the children attribute in the example below) is named.

from xml.dom.minidom import Document


class DictToXML(object):
    default_list_item_name = "item"

    def __init__(self, structure, list_mappings={}):
        self.doc = Document()

        if len(structure) == 1:
            rootName = str(list(structure.keys())[0])
            self.root = self.doc.createElement(rootName)

            self.list_mappings = list_mappings

            self.doc.appendChild(self.root)
            self.build(self.root, structure[rootName])

    def build(self, father, structure):
        if type(structure) == dict:
            for k in structure:
                tag = self.doc.createElement(k)
                father.appendChild(tag)
                self.build(tag, structure[k])
        elif type(structure) == list:
            tag_name = self.default_list_item_name

            if father.tagName in self.list_mappings:
                tag_name = self.list_mappings[father.tagName]

            for l in structure:
                tag = self.doc.createElement(tag_name)
                self.build(tag, l)
                father.appendChild(tag)
        else:
            data = str(structure)
            tag = self.doc.createTextNode(data)
            father.appendChild(tag)

    def display(self):
        print(self.doc.toprettyxml(indent="  "))

    def get_string(self):
        return self.doc.toprettyxml(indent="  ")


if __name__ == '__main__':
    example = {'sibling': {'couple': {'mother': 'mom', 'father': 'dad', 'children': [{'child': 'foo'},
                                                                                      {'child': 'bar'}]}}}
    xml = DictToXML(example)
    xml.display()

It gives the following output:

<?xml version="1.0" ?>
<sibling>
  <couple>
    <children>
      <child>
        <name>foo</name>
      </child>
      <child>
        <name>bar</name>
      </child>
    </children>
    <father>dad</father>
    <mother>mom</mother>
  </couple>
</sibling>


回答5:

Grey's link includes some solutions that look pretty robust. If you want to roll your own though, you could use xml.dom.node's childNode member recursively, terminating when node.childNode = None.