Extract content between XML tags

2019-04-17 18:21发布

I have this XML file:

<ApiHeader>
    <OperationName>findEntitiesResponse</OperationName>
</ApiHeader>
<ResponseHeader>
    <CompletedSuccessfully>true</CompletedSuccessfully>
</ResponseHeader>
<Page>
    <StartAtRow>0</StartAtRow>
    <MaxRows>999999</MaxRows>
    <TotalRowCount>44</TotalRowCount>
</Page>
<Entity>
    <Carrier>xd
        <Id>11460</Id>
        <CarrierCode>11460</CarrierCode>
        <CarrierDescription>11460 LOGIS COUTTER</CarrierDescription>
        <LanguageCode>en</LanguageCode>
        <LanguageCodeDescr>Inglés</LanguageCodeDescr>
        <CarrierTypeCode>GENERAL</CarrierTypeCode>
        <CarrierTypeCodeDescr>GENERAL</CarrierTypeCodeDescr>
        <SCACCode>Default</SCACCode>
        </Memo>
    </Carrier>
</Entity>
<Entity>

There are a lot of <Entitiy>CONTENT</Entity>like the one on the example, but I kept it simple.

What I'm trying to do is extract everything between the <Entity></Entity> tags. I've done a lot of research but the closest thing I've found is extracting content from just one tag.

And the result would be this

<Entity>
    <Carrier>xd
        <Id>11460</Id>
        <CarrierCode>11460</CarrierCode>
        <CarrierDescription>11460 LOGIS COUTTER</CarrierDescription>
        <LanguageCode>en</LanguageCode>
        <LanguageCodeDescr>Inglés</LanguageCodeDescr>
        <CarrierTypeCode>GENERAL</CarrierTypeCode>
        <CarrierTypeCodeDescr>GENERAL</CarrierTypeCodeDescr>
        <SCACCode>Default</SCACCode>
        </Memo>
    </Carrier>
</Entity>

Remeber that there could be one or more <Entity></Entity> tags.

Thank you very much.

EDIT

`public class ReadXMLFile { private final static String filepath ="C:\Users\AGOJSO\Desktop\jordi\test.xml";

public static void main(String[] args) {
    printXml();
}
public static void printXml() {
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    try (InputStream in = new FileInputStream(filepath)) {
        DocumentBuilder db = dbf.newDocumentBuilder();
        Document doc = db.parse(in);
        NodeList list = filterNodesByXPath(doc, "//root/Entity");
        for (int i = 0; i < list.getLength(); i++) {
            Node node = list.item(i);
            printNode(node);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private static NodeList filterNodesByXPath(Document doc, String xpathExpr) {
    try {
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xpath = xPathFactory.newXPath();
        XPathExpression expr = xpath.compile(xpathExpr);
        Object eval = expr.evaluate(doc, XPathConstants.NODESET);
        return (NodeList) eval;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private static void printNode(Node node) throws TransformerFactoryConfigurationError, TransformerException {
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
    StreamResult result = new StreamResult(new StringWriter());
    DOMSource source = new DOMSource(node);
    transformer.transform(source, result);
    String xmlString = result.getWriter().toString();
    System.out.println(xmlString);
}

} `

It doesnt print any errors, as it it seems to be doing nothing.

标签: java xml parsing
1条回答
淡お忘
2楼-- · 2019-04-17 18:42

You could do it the old good way.

  1. Read XML to DOM
  2. Use XPath to extract the proper part
  3. Print it out ... or do whatever you like

Code:

@Test
public void printXml() {
    String yourSampleFile = "52720162.xml";
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    try (InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile)) {
        DocumentBuilder db = dbf.newDocumentBuilder();
        Document doc = db.parse(in);
        NodeList list = filterNodesByXPath(doc, "//root/Entity");
        for (int i = 0; i < list.getLength(); i++) {
            Node node = list.item(i);
            printNode(node);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
    try {
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xpath = xPathFactory.newXPath();
        XPathExpression expr = xpath.compile(xpathExpr);
        Object eval = expr.evaluate(doc, XPathConstants.NODESET);
        return (NodeList) eval;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private void printNode(Node node) throws TransformerFactoryConfigurationError, TransformerException {
    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
    StreamResult result = new StreamResult(new StringWriter());
    DOMSource source = new DOMSource(node);
    transformer.transform(source, result);
    String xmlString = result.getWriter().toString();
    System.out.println(xmlString);
}

A somewhat generalized form can be found at: How to read XML using XPath in Java

查看更多
登录 后发表回答