How to read modify fragments of XML using StAX in

2019-07-18 20:33发布

问题:

My goal is to read objects (featureMember) into DOM, change them and write back into new XML. XML is too big to use DOM itself. I figured what I need is StAX and TransformerFactory, but I can't make it work.

This is what I've done till now:

private void change(File pathIn, File pathOut) {
    try {

        XMLInputFactory factory = XMLInputFactory.newInstance();
        XMLOutputFactory factoryOut = XMLOutputFactory.newInstance();

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer t = tf.newTransformer();

        XMLEventReader in = factory.createXMLEventReader(new FileReader(pathIn));
        XMLEventWriter out = factoryOut.createXMLEventWriter(new FileWriter(pathOut));

        while (in.hasNext()) {
            XMLEvent e = in.nextTag();
            if (e.getEventType() == XMLStreamConstants.START_ELEMENT) {
                if (((StartElement) e).getName().getLocalPart().equals("featureMember")) {
                    DOMResult result = new DOMResult();
                    t.transform(new StAXSource(in), result);
                    Node domNode = result.getNode();
                    System.out.println(domnode);
                }
            }
            out.add(e);
        }
        in.close();
        out.close();

    } catch (FileNotFoundException e1) {
        e1.printStackTrace();
    } catch (IOException e1) {
        e1.printStackTrace();
    } catch (TransformerConfigurationException e1) {
        e1.printStackTrace();
    } catch (XMLStreamException e1) {
        e1.printStackTrace();
    } catch (TransformerException e1) {
        e1.printStackTrace();
    }
}

I get exception (on t.transform()):

Exception in thread "AWT-EventQueue-0" java.lang.IllegalStateException: StAXSource(XMLEventReader) with XMLEventReader not in XMLStreamConstants.START_DOCUMENT or XMLStreamConstants.START_ELEMENT state

Simplified version of my xml looks like (it has namespaces):

<?xml version="1.0" encoding="UTF-8"?>
<gml:FeatureCollection xmlns:gml="http://www.opengis.net/gml/3.2" gml:id="featureCollection">
  <gml:featureMember>
    </eg:RST>
    <eg:pole>Krakow</eg:pole>
    <eg:localId>id1234</eg:localId>
  </gml:featureMember>
  <gml:featureMember>
    <eg:RST>1002</eg:RST>
    <eg:pole>Rzeszow</eg:pole>
    <eg:localId>id1235</eg:localId>
  </gml:featureMember>
</gml:FeatureCollection>

I have a list of localId's of objects (featureMember), which I want to change and correspoding changed RST or pole (it depends on user which one is changed):

localId (id1234) RST (1001)

localId (id1236) RST (1003)

...

回答1:

The problem you're having is that when you create the StAXSource, your START_ELEMENT event has already been consumed. So the XMLEventReader is probably at some whitespace text node event, or something else that can't be an XML document source. You can use the peek() method to view the next event without consuming it. Make sure there is an event with hasNext() first, though.

I'm not 100% sure of what you wish to accomplish, so here's some things you could do depending on the scenario.

EDIT: I just read some of the comments on your question which make things a bit more clear. The below could still help you to achieve the desired result with some adjustment. Also note that Java XSLT processors allow for extension functions and extension elements, which can call into Java code from an XSLT stylesheet. This can be a powerful method to extend basic XSLT functionality with external resources such as database queries.


In case you want the input XML to be transformed into one output XML, you might be better of simply using an XML stylesheet transformation. In your code, you create a transformer without any templates, so it becomes the default "identity transformer" which just copies input to output. Suppose your input XML is as follows:

<?xml version="1.0" encoding="UTF-8"?>
<gml:FeatureCollection xmlns:gml="http://www.opengis.net/gml/3.2" gml:id="featureCollection" xmlns:eg="acme.com">
  <gml:featureMember>
    <eg:RST/>
    <eg:pole>Krakow</eg:pole>
    <eg:localId>id1234</eg:localId>
  </gml:featureMember>
  <gml:featureMember>
    <eg:RST>1002</eg:RST>
    <eg:pole>Rzeszow</eg:pole>
    <eg:localId>id1235</eg:localId>
  </gml:featureMember>
</gml:FeatureCollection>

I've bound the eg prefix to some dummy namespace since it was missing from your sample and fixed the malformed RST element.

The following program will run an XSLT transformation on your input and writes it to an output file.

package xsltplayground;

import java.io.File;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

public class XSLTplayground {

    public static void main(String[] args) throws Exception {

        URL url = XSLTplayground.class.getResource("sample.xml");
        File input = new File(url.toURI());
        URL url2 = XSLTplayground.class.getResource("stylesheet.xsl");
        File xslt = new File(url2.toURI());
        URL url3 = XSLTplayground.class.getResource(".");
        File output = new File(new File(url3.toURI()), "output.xml");
        change(input, output, xslt);

    }

    private static void change(File pathIn, File pathOut, File xsltFile) {
        try {

            // Creating transformer with XSLT file
            TransformerFactory tf = TransformerFactory.newInstance();
            Source xsltSource = new StreamSource(xsltFile);
            Transformer t = tf.newTransformer(xsltSource);

            // Input source
            Source input = new StreamSource(pathIn);

            // Output target
            Result output = new StreamResult(pathOut);

            // Transforming
            t.transform(input, output);

        } catch (TransformerConfigurationException ex) {
            Logger.getLogger(XSLTplayground.class.getName()).log(Level.SEVERE, null, ex);
        } catch (TransformerException ex) {
            Logger.getLogger(XSLTplayground.class.getName()).log(Level.SEVERE, null, ex);
        } 
    }

}

Here's a sample stylesheet.xsl file, which for convenience I just dumped into the same package as the input XML and class.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:eg="acme.com">

    <xsl:output method="xml" indent="yes"/>

    <xsl:template match="node()|@*">
        <xsl:copy>
            <xsl:apply-templates select="node()|@*" />
        </xsl:copy>
    </xsl:template>

    <xsl:template match="gml:featureMember">
        <gml:member>
            <xsl:apply-templates select="node()|@*" />
        </gml:member>
    </xsl:template>

</xsl:stylesheet>

The above stylesheet will copy everything by default, but when it gets to a <gml:featureMember> element it will wrap the contents into a new <gml:member> element. Just a very simple example of what you can do with XSLT.

The output would be:

<?xml version="1.0" encoding="UTF-8"?>
<gml:FeatureCollection xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:eg="acme.com" gml:id="featureCollection">
  <gml:member>
    <eg:RST/>
    <eg:pole>Krakow</eg:pole>
    <eg:localId>id1234</eg:localId>
  </gml:member>
  <gml:member>
    <eg:RST>1002</eg:RST>
    <eg:pole>Rzeszow</eg:pole>
    <eg:localId>id1235</eg:localId>
  </gml:member>
</gml:FeatureCollection>

Since both input and output are file streams, you don't need the entire DOM in memory. XSLT in Java is pretty fast and efficient, so this might suffice.


Maybe you actually want to split every occurrence of some element into its own output file, with some changes to it. Here's an example of code that uses StAX for splitting off the <gml:featureMember> elements as separate documents. You could then iterate over the created files an transform them however you want (XSLT would again be a good choice). Obviously the error handling would need to be a bit more robust. This is just for demonstration.

package xsltplayground;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.stream.XMLEventFactory;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLEventWriter;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.stream.StreamSource;

public class XSLTplayground {

    public static void main(String[] args) throws Exception {

        URL url = XSLTplayground.class.getResource("sample.xml");
        File input = new File(url.toURI());
        URL url2 = XSLTplayground.class.getResource("stylesheet.xsl");
        File xslt = new File(url2.toURI());
        URL url3 = XSLTplayground.class.getResource(".");
        File output = new File(url3.toURI());
        change(input, output, xslt);

    }

    private static void change(File pathIn, File directoryOut, File xsltFile) throws InterruptedException {
        try {

            // Creating a StAX event reader from the input
            XMLInputFactory xmlIf = XMLInputFactory.newFactory();
            XMLEventReader reader = xmlIf.createXMLEventReader(new StreamSource(pathIn));

            // Create a StAX output factory
            XMLOutputFactory xmlOf = XMLOutputFactory.newInstance();

            int counter = 1;
            // Keep going until no more events
            while (reader.hasNext()) {
                // Peek into the next event to find out what it is
                XMLEvent next = reader.peek();
                // If it's the start of a featureMember element, commence output
                if (next.isStartElement() 
                        && next.asStartElement().getName().getLocalPart().equals("featureMember")) {
                    File output = new File(directoryOut, "output_" + counter + ".xml");
                    try (OutputStream ops = new FileOutputStream(output)) {
                        XMLEventWriter writer = xmlOf.createXMLEventWriter(ops);
                        copy(reader, writer);
                        writer.flush();
                        writer.close();
                    }
                    counter++;
                } else {
                    // Not in a featureMember element: ignore
                    reader.next();
                }
            }

        } catch (XMLStreamException ex) {
            Logger.getLogger(XSLTplayground.class.getName()).log(Level.SEVERE, null, ex);
        } catch (IOException ex) {
            Logger.getLogger(XSLTplayground.class.getName()).log(Level.SEVERE, null, ex);
        } 
    }

    private static void copy(XMLEventReader reader, XMLEventWriter writer) throws XMLStreamException {

        // Creating an XMLEventFactory
        XMLEventFactory ef = XMLEventFactory.newFactory();
        // Writing an XML document start
        writer.add(ef.createStartDocument());

        int depth = 0;
        boolean stop = false;
        while (!stop) {
            XMLEvent next = reader.nextEvent();
            writer.add(next);
            if (next.isStartElement()) {
                depth++;
            } else if (next.isEndElement()) {
                depth--;
                if (depth == 0) {
                    writer.add(ef.createEndDocument());
                    stop = true;
                }
            }
        }

    }

}


标签: java xml dom stax