对于XML列表Python函数(Python function for xml list)

2019-09-29 03:46发布

我已经解析XML文件看起来像这样。 也许我只是没有抄好,但它的确定,所以,在这里它是:

     <?xml version="1.0" encoding="UTF-8"?>
        <!DOCTYPE raml SYSTEM 'raml20.dtd'>
        <raml version="2.0" xmlns="raml20.xsd">
        <cmData type="actual">
            <managedObject class="LN" distName="PTR" id="2425">
              <p name="aak">220</p>
              <p name="orp">05</p>
              <p name="name">Portro</p>
              <p name="optres">false</p>
              <p name="optblu">false</p>
              <p name="aoptdet">false</p>
              <p name="advcell">false</p>
              <list name="sibList">
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">2</p>
                </item>
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">1</p>
                </item>
              </list>
            </managedObject>
            <managedObject class="LN" distName="KRNS" id="93886">
              <p name="aak">150</p>
              <p name="orp">05</p>
              <p name="name">Portro</p>
              <p name="optres">false</p>
              <p name="optblu">tru</p>
              <p name="aoptdet">false</p>
              <p name="advcell">true</p>
              <list name="sibList">
                <item>
                  <p name="sibcity">177</p>
                  <p name="sibrep">1</p>
                </item>
                <item>
                  <p name="sibcity">180</p>
                  <p name="sibrep">2</p>
                </item>
               </list>
            </managedObject>
             ....
            <managedObject>
             ...
            </managedObject>

            ...
        </cmData>
        </raml>

我需要从第一managedObject经过的所有“managedObject”,并比较各参数(P名)从另一managedObjects参数(AAK,ORP等),并获得不同的参数和它们的值的输出,如果没有不同的参数值,什么也不做。 我写的代码比较讨论,但我不知道如何通过列表去(它命名为“sibList”)和比较的参数。 我写了这个功能,其中关键是“P名”和值是“P名”的价值:

temp = []
for i in temp_ln:
    for j, k in zip(i.getchildren(), i):
        temp.append([i.get('distName'), j.get('name'), j.text])

    tempdict = {}
    for i in temp_ln:
        td = {}
        for j in i.getchildren():
            td.update({j.get('name'): j.text})
        tempdict.update({i.get('distName'): td})


elements_list = {}
   if j.get('name') == 'sibList':
            for item in j.getchildren():
                for w in item.getchildren():
                    elements_list.update({ w.get('name'): w.text})

        main_dif = {}
        for key, value in tempdict.iteritems():
            dif_k = {}
            for k, v in value.iteritems():
                try: 
                    a = ref[k]
                except:
                    a = None
                if v != a:
                    if k == 'name':
                        pass
                    else:
                        dif_k.update({k:(v, a)})
            main_dif.update({key:dif_k})

Answer 1:

下面是解析XML文件的解决方案,每个managedObject与所有其他人进行比较,并打印出所产生的差异对象。

import json
from xml.etree import ElementTree


tree = ElementTree.parse('raml20.xml')

ns = {'ns': 'raml20.xsd'}
nsP, nsList, nsItem = ('{%s}%s' % (ns['ns'], i) for i in ('p', 'list', 'item'))


def pkv(o):
    """Return dict with name:text of p elements"""
    return {k.attrib['name']: k.text for k in o.iter(nsP)}


def parse(tree):
    root = tree.getroot()
    objs = {}
    for mo in root.findall('./ns:cmData/ns:managedObject', ns):
        obj = pkv(mo)
        for i in mo.iter(nsList):
            obj[i.attrib['name']] = [pkv(j) for j in i.iter(nsItem)]
        objs[mo.attrib['distName']] = obj
    return objs


def diff_dicts(d1, d2, ignore_keys=set()):
    """Return dict with differences between the dicts provided as arguments"""
    k1 = set(d1.keys())
    k2 = set(d2.keys())
    diff = {}
    diff.update(
        {i: (d1[i], d2[i]) for i in (k1 & k2) - ignore_keys if d1[i] != d2[i]})
    diff.update({i: (d1.get(i), d2.get(i)) for i in (k1 ^ k2) - ignore_keys})
    return diff


def diff_lists(l1, l2):
    """Return dict with differences between lists of dicts provided as arguments"""
    diff = {}
    # note: assumes that lists are of same length
    for i, (d1, d2) in enumerate(zip(l1, l2)):
        d = diff_dicts(d1, d2)
        if d:
            diff[i] = d
    return diff


def diff_objects(o1, o2):
    """Return dict with differences between two objects (dicts) provided as arguments"""
    listkeys = set(
        i for o in (o1, o2) for i in o if isinstance(o.get(i), list))
    diff = diff_dicts(o1, o2, listkeys)
    for i in listkeys:
        if i in o1 and i in o2:
            diff.update({i: diff_lists(o1[i], o2[i])})
        else:
            diff.update({i: (o1.get(i), o2.get(i))})
    return diff


def compare_objects(objs):
    diffs = []
    keys = list(objs)
    for k1, k2 in zip(keys[:-1], keys[1:]):
        o1, o2 = objs[k1], objs[k2]
        diff = diff_objects(o1, o2)
        if diff:
            diffs.append((k1, k2, diff))
    return diffs


res = compare_objects(parse(tree))
print(json.dumps(res, indent=2))

我用下面的测试raml20.xml文件:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE raml SYSTEM 'raml20.dtd'>
<raml version="2.0" xmlns="raml20.xsd">
  <cmData type="actual">
    <managedObject class="LN" distName="PTR" id="2425">
      <p name="aak">220</p>
      <p name="orp">05</p>
      <p name="name">Portro</p>
      <p name="optres">false</p>
      <p name="optblu">false</p>
      <p name="aoptdet">false</p>
      <p name="advcell">false</p>
      <list name="sibList">
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">2</p>
        </item>
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">1</p>
        </item>
      </list>
    </managedObject>
    <managedObject class="LN" distName="KRNS" id="93886">
      <p name="aak">150</p>
      <p name="orp">05</p>
      <p name="name">Portro</p>
      <p name="optres">false</p>
      <p name="optblu">tru</p>
      <p name="aoptdet">false</p>
      <p name="advcell">true</p>
      <list name="sibList">
        <item>
          <p name="sibcity">177</p>
          <p name="sibrep">1</p>
        </item>
        <item>
          <p name="sibcity">180</p>
          <p name="sibrep">2</p>
        </item>
       </list>
    </managedObject>
  </cmData>
</raml>

将所得的diff目的是:

[
  [
    "PTR",
    "KRNS",
    {
      "advcell": [
        "false",
        "true"
      ],
      "optblu": [
        "false",
        "tru"
      ],
      "sibcity": [
        "177",
        "180"
      ],
      "aak": [
        "220",
        "150"
      ],
      "sibrep": [
        "1",
        "2"
      ],
      "sibList": {
        "0": {
          "sibrep": [
            "2",
            "1"
          ]
        },
        "1": {
          "sibcity": [
            "177",
            "180"
          ],
          "sibrep": [
            "1",
            "2"
          ]
        }
      }
    }
  ]
]


文章来源: Python function for xml list