-->

XML parsing : encoding utf-8 & UTF-8

2019-03-31 06:27发布

问题:

I am trying to parse the values from this LINK, whose xml encoding is like this

<?xml version="1.0" encoding="utf-8"?>

when I tried to get response throws message in logcat as shown

11-19 17:25:13.350: W/System.err(3360): This is not valid URL
11-19 17:25:13.350: W/System.err(3360): java.lang.NullPointerException

When I tried with some other LINK ,whose encoding is like this

<?xml version="1.0" encoding="UTF-8"?> It works fine, I can parse the values.

is xml parsing failing due encoding not being UTF-8,it is utf-8 ??

How should I deal with this. I have done google and am new to XML parsing. This is my first parsing which I am trying to create. Please let me know the way.

updated with code :

public String getXmlFromUrl(String url) {

    String xml = null;

    try {            
        // defaultHttpClient
        HttpClient httpClient = new DefaultHttpClient();
        HttpPost httpPost = new HttpPost(url);           
        HttpResponse httpResponse = httpClient.execute(httpPost);
        HttpEntity httpEntity = httpResponse.getEntity(); 
        xml = EntityUtils.toString(httpEntity);
        System.out.println("response -- " + xml);

    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (ClientProtocolException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
    // return XML
    return xml;
}

回答1:

Straight away it looks like the problem is with Encoding of XML in your response.

URL url = new URL("http://myurl.com");
InputSource is = new InputSource(url.openStream());
is.setEncoding("ISO-8859-1"); // Also Try UTF-8 or UTF-16
BufferedReader br = new BufferedReader(new InputStreamReader(is.getByteStream()));
String line,str;
while((line=br.readLine())!=null)
{
      str = str + line;
}
Log.i(TAG,str);


回答2:

Check charset of content type header of the response. It can either be ISO-8859-1 or UTF-8. Encode accordingly.

EDIT : For the link you have provided i did not set any encoding for the response. Since you have mentioned you are new to XML parsing, i am posting my parser implementation.

My Parser:

public final class SampleParser{


    final static String ROOT_NODE = "Menus";
    final static String ELEMENT_SITEMENU = "SiteMenu";
    final static String ELEMENT_ID = "menuID";
    final static String ELEMENT_TITLE = "menuTitle";
    final static String ELEMENT_CUSTOM = "menuIsCustom";
    final static String ELEMENT_PAGE_URL = "menuCustomPageURL";
    final static String ELEMENT_IOS_ID = "iosMenuID";

    private static final String TAG="SampleParser";

    /**
     * Intentionally commented
     */
    private SampleParser() {}

    /**
     * @param response The XML string which represents the complete news data
     * @return news The complete data
     */
    public static Menus parse(String response) {
        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp;
        try {
            sp = spf.newSAXParser();
            XMLReader xr = sp.getXMLReader();
            SampleDataHandler dataHandler = new SampleDataHandler();
            xr.setContentHandler(dataHandler);
            InputSource source = new InputSource(new StringReader(response)); 
            xr.parse(source);
            Menus result = dataHandler.getData();
            return result;
        } catch (ParserConfigurationException e) {
            Log.e(TAG, "parse", e);
        } catch (SAXException e) {
            Log.e(TAG, "parse", e);
        } catch (IOException e) {
            Log.e(TAG, "parse", e);
        } 
        return null;
    }

    static class SampleDataHandler extends DefaultHandler {
        /**
         * 
         */
        private static final String TAG="SampleDataHandler";
        /**
         * 
         */
        private Menus data;
        /**
         * 
         */
        private SiteMenu tempElement;
        /**
         * 
         */
        private boolean readingIosId;
        /**
         * 
         */
        private boolean readingTitle;
        /**
         * 
         */
        private boolean readingID;
        /**
         * 
         */
        private boolean readingCustom;
        /**
         * 
         */
        private boolean readingCustomURL;


        /**
         * 
         */
        public Menus getData(){
            return data;
        }

        /*
         * (non-Javadoc)
         * 
         * @see org.xml.sax.helpers.DefaultHandler#endDocument()
         */
        @Override
        public void endDocument() throws SAXException {
            Log.d(TAG, "endDocument Finished parsing response");
        }

        /*
         * (non-Javadoc)
         * 
         * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
         * java.lang.String, java.lang.String)
         */
        @Override
        public void endElement(String uri, String localName, String qName)
                throws SAXException {
            if(qName.equalsIgnoreCase(ELEMENT_SITEMENU)){
                data.addMathematician(tempElement);
            }else if(qName.equalsIgnoreCase(ELEMENT_ID)){
                readingID = false;
            }else if(qName.equalsIgnoreCase(ELEMENT_TITLE)){
                readingTitle = false;
            }else if(qName.equalsIgnoreCase(ELEMENT_IOS_ID)){
                readingIosId = false;
            }else if(qName.equalsIgnoreCase(ELEMENT_CUSTOM)){
                readingCustom = false;
            }else if(qName.equalsIgnoreCase(ELEMENT_PAGE_URL)){
                readingCustomURL = false;
            }
        }

        /*
         * (non-Javadoc)
         * 
         * @see org.xml.sax.helpers.DefaultHandler#startDocument()
         */
        @Override
        public void startDocument() throws SAXException {
            data = new Menus();
            Log.d(TAG, "startDocument Started parsing response");
        }

        /*
         * (non-Javadoc)
         * 
         * @see
         * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
         * java.lang.String, java.lang.String, org.xml.sax.Attributes)
         */
        @Override
        public void startElement(String uri, String localName, String qName,
                Attributes attributes) throws SAXException {
            if(qName.equalsIgnoreCase(ROOT_NODE)){
                //data.setData(new ArrayList<NewsElement>());
            }else if(qName.equalsIgnoreCase(ELEMENT_SITEMENU)){
                tempElement = new SiteMenu();
            }else if(qName.equalsIgnoreCase(ELEMENT_IOS_ID)){
                readingIosId = true;                
            }else if(qName.equalsIgnoreCase(ELEMENT_ID)){
                readingID = true;
            }else if(qName.equalsIgnoreCase(ELEMENT_TITLE)){
                readingTitle = true;
            }else if(qName.equalsIgnoreCase(ELEMENT_CUSTOM)){
                readingCustom = true;
            }else if(qName.equalsIgnoreCase(ELEMENT_PAGE_URL)){
                readingCustomURL = true;
            }
        }

        /*
         * (non-Javadoc)
         * 
         * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
         */
        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            String chars = new String(ch, start, length);    
            chars = chars.trim(); 
            if(readingID){
                try{
                    tempElement.setId(Integer.parseInt(chars));
                }catch(Exception e){
                    Log.e(TAG, "characters[Parsing ID]", e);
                    tempElement.setId(-1);
                }
            }
            else if(readingIosId){
                try{
                    tempElement.setiOSID(Integer.parseInt(chars));
                }catch(Exception e){
                    Log.e(TAG, "characters[Parsing iOSID]", e);
                    tempElement.setiOSID(-1);
                }
            }else if(readingTitle){
                tempElement.setTitle(chars);
            }else if(readingCustom){
                try{
                    tempElement.setCustom(Boolean.parseBoolean(chars));
                }catch(Exception e){
                    Log.e(TAG, "characters[Parsing custom]", e);
                    tempElement.setCustom(false);
                }
            }else if(readingCustomURL){
                tempElement.setCustomUrl(chars);
            }
        }
    }
}

My utils methods for network call. [Same as yours]

    /**
     * @param url
     * @return
     */
    private static HttpEntity getResponseEntity(String url) {

        DefaultHttpClient httpClient = getHttpClient();

        HttpGet getMethod = new HttpGet(url);
        long startTime= 0;

        try {
            HttpResponse httpResponse = httpClient.execute(getMethod);

            HttpEntity responseEntity = httpResponse.getEntity();
            return responseEntity;

        } catch (IOException ioe) {
            Log.e(TAG, "getResponseEntity", ioe);
        } catch (Exception e) {
            Log.e(TAG, "getResponseEntity", e);
        } 
        return null;
    }

    /**
     * @param url
     * @return
     */
    public static String getRespAsString(String url) {

        if (!ApplicationInfo.networkStatus) {
            // No Internet Connection
            return null;
        }

        try {
            HttpEntity responseEntity = getResponseEntity(url);
            if (responseEntity != null)
                return EntityUtils.toString(responseEntity);
            else
                return null;
        } catch (Exception e) {
            Log.e(TAG, "getRespAsString", e);
        }

        return null;
    }

Just get the response and invoke the parser, eg. SampleParser.parse(response);