Convert xml to key-value pair notation

2019-03-20 13:41发布

I use xmlstarlet el -v to display the structure of an xml file, including all the attributes and values. I would like to convert its output to some sort of key-value pairs, i.e. each attribute with its value on a separate line (including the XPath); each line must be unique.

Current result:

topRoot/topSystem/commSvcEp/commSyslog[@descr='Syslog Service' and @name='syslog' and @policyOwner='local' and @severity='critical']
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[@adminState='disabled' and @forwardingFacility='local7' and @hostname='none' and @name='secondary' and @severity='critical']
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[@adminState='disabled' and @forwardingFacility='local7' and @hostname='none' and @name='tertiary' and @severity='critical']
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[@adminState='disabled' and @forwardingFacility='local7' and @hostname='none' and @name='primary' and @severity='critical']

Desired result (could be anything similar; the indices are just an idea):

topRoot/topSystem/commSvcEp/commSyslog@descr='Syslog Service'
topRoot/topSystem/commSvcEp/commSyslog@name='syslog'
topRoot/topSystem/commSvcEp/commSyslog@policyOwner='local'
topRoot/topSystem/commSvcEp/commSyslog@severity='critical'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[0]@adminState='disabled'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[0]@forwardingFacility='local7'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[0]@hostname='none'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[0]@name='secondary'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[0]@severity='critical'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[1]@adminState='disabled'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[1]@forwardingFacility='local7'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[1]@hostname='none'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[1]@name='tertiary'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[1]@severity='critical'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[2]@adminState='disabled'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[2]@forwardingFacility='local7'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[2]@hostname='none'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[2]@name='primary'
topRoot/topSystem/commSvcEp/commSyslog/commSyslogClient[2]@severity='critical'

What I'm trying to accomplish, is being able to run diff over two such files or using grep to filter matching patterns. I'm sure there's a way to create such output, without using sed, awk or anything else but xmlstarlet itself.

I'm pretty much a newbie regarding xmlstarlet and the whole xml world (not only by the fact that I dislike xml due to its complexity and parsing overhead, etc), so I'd really appreciate your help. Thanks!

2条回答
女痞
2楼-- · 2019-03-20 14:20

I decided to write a small program using libxml2, which parses the XML and recursively outputs it in the desired format.

The code can be compiled using the following command, or something similar (I decided to call it xmlkv, kv standing for key-value):

cc -o xmlkv xmlkv.c -s -Os -Wall -Wextra `xml2-config --cflags --libs`

The code:

#include <stdlib.h>
#include <stdio.h>
#include <libxml/parser.h>
#include <libxml/tree.h>

static void print_elements(xmlNode *);

static void
print_elements(xmlNode *node)
{
    xmlNode *n;
    xmlAttrPtr a;
    xmlChar *v, *p, *q;

    for (n = node; n; n = n->next) {
        if (n->type == XML_ELEMENT_NODE) {
            if ((p = xmlGetNodePath(n)) == NULL)
                return;

            for (q = p; *q; q++)
                if (*q == '/')
                    *q = '.';

            for (a = n->properties; a; a = a->next) {
                if ((v = xmlGetProp(n, a->name)) == NULL)
                    return;

                printf("%s.%s=%s\n", p + 1, a->name, v);
                xmlFree(v);
            }

            xmlFree(p);
        }

        print_elements(n->children);
    }
}

int
main(int argc, char **argv)
{
    xmlDoc *doc = NULL;
    xmlNode *root = NULL;

    LIBXML_TEST_VERSION

    if (argc != 2) {
        fprintf(stderr, "usage: %s <file>\n", *argv);
        return EXIT_FAILURE;
    }

    if ((doc = xmlReadFile(argv[1], NULL, 0)) == NULL)
        return EXIT_FAILURE;

    if ((root = xmlDocGetRootElement(doc)) == NULL)
        return EXIT_FAILURE;

    print_elements(root);

    xmlFreeDoc(doc);
    xmlCleanupParser();

    return EXIT_SUCCESS;
}

A test:

$ cat test.xml
<commSyslog descr="Syslog Service" name="syslog" policyOwner="local" severity="critical">
<commSyslogClient adminState="disabled" forwardingFacility="local7" hostname="none" name="secondary" severity="critical"/>
<commSyslogClient adminState="disabled" forwardingFacility="local7" hostname="none" name="tertiary" severity="critical"/>
<commSyslogClient adminState="disabled" forwardingFacility="local7" hostname="none" name="primary" severity="critical"/>
<commSyslogMonitor adminState="disabled" descr="" name="" severity="critical"/>
<commSyslogConsole adminState="disabled" descr="" name="" severity="critical"/>
<commSyslogSource audits="disabled" descr="" events="disabled" faults="enabled" name=""/>
<commSyslogFile adminState="enabled" descr="" name="messages" severity="critical" size="4194304"/>
</commSyslog>

$ ./xmlkv test.xml
commSyslog.descr=Syslog Service
commSyslog.name=syslog
commSyslog.policyOwner=local
commSyslog.severity=critical
commSyslog.commSyslogClient[1].adminState=disabled
commSyslog.commSyslogClient[1].forwardingFacility=local7
commSyslog.commSyslogClient[1].hostname=none
commSyslog.commSyslogClient[1].name=secondary
commSyslog.commSyslogClient[1].severity=critical
commSyslog.commSyslogClient[2].adminState=disabled
commSyslog.commSyslogClient[2].forwardingFacility=local7
commSyslog.commSyslogClient[2].hostname=none
commSyslog.commSyslogClient[2].name=tertiary
commSyslog.commSyslogClient[2].severity=critical
commSyslog.commSyslogClient[3].adminState=disabled
commSyslog.commSyslogClient[3].forwardingFacility=local7
commSyslog.commSyslogClient[3].hostname=none
commSyslog.commSyslogClient[3].name=primary
commSyslog.commSyslogClient[3].severity=critical
commSyslog.commSyslogMonitor.adminState=disabled
commSyslog.commSyslogMonitor.descr=
commSyslog.commSyslogMonitor.name=
commSyslog.commSyslogMonitor.severity=critical
commSyslog.commSyslogConsole.adminState=disabled
commSyslog.commSyslogConsole.descr=
commSyslog.commSyslogConsole.name=
commSyslog.commSyslogConsole.severity=critical
commSyslog.commSyslogSource.audits=disabled
commSyslog.commSyslogSource.descr=
commSyslog.commSyslogSource.events=disabled
commSyslog.commSyslogSource.faults=enabled
commSyslog.commSyslogSource.name=
commSyslog.commSyslogFile.adminState=enabled
commSyslog.commSyslogFile.descr=
commSyslog.commSyslogFile.name=messages
commSyslog.commSyslogFile.severity=critical
commSyslog.commSyslogFile.size=4194304

Seems nice, just what I wanted. Of course, replacing / with . is pretty much useless, but I prefer this notation since it's better on the eyes.

查看更多
干净又极端
3楼-- · 2019-03-20 14:38

Since you're already using xmlstarlet, you could also use XSLT.

XSLT 1.0 (can be run with xmlstarlet by using the tr command)

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="text"/>
    <xsl:strip-space elements="*"/>

    <xsl:param name="sep" select="'.'"/>

    <xsl:template match="text()"/>

    <xsl:template match="*">
        <xsl:variable name="path">
            <xsl:for-each select="ancestor-or-self::*">
                <xsl:variable name="predicate">
                    <xsl:call-template name="genPredicate"/>
                </xsl:variable>
                <xsl:if test="ancestor::*">
                    <xsl:value-of select="$sep"/>
                </xsl:if>
                <xsl:value-of select="concat(local-name(),$predicate)"/>
            </xsl:for-each>
        </xsl:variable>
        <xsl:for-each select="@*">
            <xsl:value-of select="concat($path,$sep,name(),'=',.)"/>
            <xsl:text>&#xA;</xsl:text>
        </xsl:for-each>
        <xsl:if test="not(@*)">
            <xsl:text>&#xA;</xsl:text>          
        </xsl:if>
        <xsl:apply-templates select="node()"/>
    </xsl:template>

    <xsl:template name="genPredicate">
        <xsl:if test="preceding-sibling::*[local-name()=local-name(current())] or following-sibling::*[local-name()=local-name(current())]">
            <xsl:value-of select="concat('[',count(preceding-sibling::*[local-name()=local-name(current())])+1,']')"/>          
        </xsl:if>
    </xsl:template>

</xsl:stylesheet>

Note: There is a parameter named sep so you can change the separator from the command line (using -p sep="/" or whatever separator you want). It's currently set to your preferred ".".

Output

commSyslog.descr=Syslog Service
commSyslog.name=syslog
commSyslog.policyOwner=local
commSyslog.severity=critical
commSyslog.commSyslogClient[1].adminState=disabled
commSyslog.commSyslogClient[1].forwardingFacility=local7
commSyslog.commSyslogClient[1].hostname=none
commSyslog.commSyslogClient[1].name=secondary
commSyslog.commSyslogClient[1].severity=critical
commSyslog.commSyslogClient[2].adminState=disabled
commSyslog.commSyslogClient[2].forwardingFacility=local7
commSyslog.commSyslogClient[2].hostname=none
commSyslog.commSyslogClient[2].name=tertiary
commSyslog.commSyslogClient[2].severity=critical
commSyslog.commSyslogClient[3].adminState=disabled
commSyslog.commSyslogClient[3].forwardingFacility=local7
commSyslog.commSyslogClient[3].hostname=none
commSyslog.commSyslogClient[3].name=primary
commSyslog.commSyslogClient[3].severity=critical
commSyslog.commSyslogMonitor.adminState=disabled
commSyslog.commSyslogMonitor.descr=
commSyslog.commSyslogMonitor.name=
commSyslog.commSyslogMonitor.severity=critical
commSyslog.commSyslogConsole.adminState=disabled
commSyslog.commSyslogConsole.descr=
commSyslog.commSyslogConsole.name=
commSyslog.commSyslogConsole.severity=critical
commSyslog.commSyslogSource.audits=disabled
commSyslog.commSyslogSource.descr=
commSyslog.commSyslogSource.events=disabled
commSyslog.commSyslogSource.faults=enabled
commSyslog.commSyslogSource.name=
commSyslog.commSyslogFile.adminState=enabled
commSyslog.commSyslogFile.descr=
commSyslog.commSyslogFile.name=messages
commSyslog.commSyslogFile.severity=critical
commSyslog.commSyslogFile.size=4194304
查看更多
登录 后发表回答