JAXB should ignore element

2019-08-10 20:45发布

问题:

The structure

<html>
   <span><h1>test</h1></span>
   <table>

   </table>
</html>

How can i get the text "test" if <span> might be a <div>?

@XmlAccessorType(XmlAccessType.FIELD)
public class HtmlTag {
    @XmlElement(name = "h1")
    String h1;
}

Unmarshalls to null.

回答1:

@XmlAccessorType(XmlAccessType.FIELD)
public class HtmlTag 
{
    @XmlAnyElement
    List<org.w3c.dom.Element> elements;
}  

get test string

HtmlTag htmlTag = //...  
Element firstElement = htmlTag.elements.get(0); // this is first element,
                                        // currently it is <span>
firstElement.getElementsByTagName("h1").item(0).getTextContent(); // return 'test'


回答2:

You can leverage a StAX StreamFilter on an XMLStreamReader so that the elements you want to ignore are not reported as events. Then you can unmarshal from the XMLStreamReader with JAXB.

import javax.xml.bind.*;
import javax.xml.stream.*;
import javax.xml.transform.stream.StreamSource;

public class Demo {

    public static void main(String[] args) throws Exception {
        JAXBContext jc = JAXBContext.newInstance(HtmlTag.class);

        XMLInputFactory xif = XMLInputFactory.newFactory();
        StreamSource xml = new StreamSource("src/forum17613060/input.xml");
        XMLStreamReader xsr = xif.createXMLStreamReader(xml);
        xsr = xif.createFilteredReader(xsr, new StreamFilter() {

            @Override
            public boolean accept(XMLStreamReader reader) {
                if(reader.isStartElement() || reader.isEndElement()) {
                    String localName = reader.getLocalName();
                    return !"span".equals(localName) && !"div".equals(localName);
                }
                return true;

            }

        });

        Unmarshaller unmarshaller = jc.createUnmarshaller();
        HtmlTag htmlTag = unmarshaller.unmarshal(xsr, HtmlTag.class).getValue();
        System.out.println(htmlTag.h1);
    }

}