Jsoup超时

2019-07-10 08:29发布

 1 package cn.xls.util;
 2 
 3 import cn.xls.pojo.City;
 4 import org.jsoup.Jsoup;
 5 import org.jsoup.nodes.Document;
 6 import org.jsoup.nodes.Element;
 7 import org.jsoup.select.Elements;
 8 
 9 import java.io.IOException;
10 import java.util.ArrayList;
11 import java.util.List;
12 
13 /**
14  * @program: 
15  * @description: 获取城市信息
16  * @author: l
17  */
18 public class CityInfoUtil {
19 
20     public static List<City> getProvinceData(String requestUrl) {
21         List<City> lists = new ArrayList<City>();
22         try {
23             Document document = Jsoup.connect(requestUrl).timeout(50000).maxBodySize(0).get();
24             //获取所有的省份
25             Elements provinceList = document.select("tr[class='provincetr']").select("td").select("a");
26             //遍历省份
27             for (Element element : provinceList) {
28                 //拼接当前省份下的城市地址
29                 String url1 = requestUrl.replace("index.html", element.attr("href"));
30                 Document document1 = Jsoup.connect(url1).timeout(50000).maxBodySize(0).get();
31                 
32                 //获取该省份下所有城市(第一列为城市区号,第二列为城市名)
33                 Elements citys = document1.select("tr[class='citytr']").select("td:eq(1)").select("a");
34                 String province = element.html();
35                 System.out.println("当前省份 : " + province);
36                 System.out.println("当前省份下有 : " + citys.size() + " 个城市");
37                 //遍历城市
38                 for (Element element1 : citys) {
39 //                    System.out.println("城市 : " + element1.html());
40                     //拼接当前城市下的区域地址
41                     String url2 = requestUrl.replace("index.html", element1.attr("href"));
42                     Document document2 = Jsoup.connect(url2).timeout(50000).maxBodySize(0).get();
43                     
44                     //获取该城市下所有区域(第一列为区域区号,第二列为区域名)
45                     Elements areas = document2.select("tr[class=countytr] td:eq(1)").select("a");
46                     String city = element1.html();
47                     System.out.println("当前城市 : " + city);
48                     System.out.println("当前城市下有 : " + areas.size() + " 个区");
49                     for (Element element2 : areas) {
50                         System.out.println("区域 : " + element2.html());
51                     }
52                 }
53                 System.out.println("-----------");
54             }
55         } catch (IOException e) {
56             e.printStackTrace();
57         }
58         return lists;
59     }
60     
61     //测试
62     public static void main(String[] args) {
63         List<City> lists = getProvinceData("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html");
64     }
65 }

 

复制代码
区域 : 呼和浩特金海工业园区
区域 : 呼和浩特经济技术开发区
城市 : 包头市
java.net.SocketTimeoutException: Read timed out
    at java.net.SocketInputStream.socketRead0(Native Method)
    at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
    at java.net.SocketInputStream.read(SocketInputStream.java:170)
    at java.net.SocketInputStream.read(SocketInputStream.java:141)
    at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
    at java.io.BufferedInputStream.read1(BufferedInputStream.java:286)
    at java.io.BufferedInputStream.read(BufferedInputStream.java:345)
    at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704)
    at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647)
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1535)
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1440)
    at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:750)
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:722)
    at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:306)
    at org.jsoup.helper.HttpConnection.get(HttpConnection.java:295)
    at cn.xls.util.CityInfoUtil.getProvinceData(CityInfoUtil.java:49)
    at cn.xls.util.CityInfoUtil.main(CityInfoUtil.java:78)
复制代码

每次打印了四分之一左右的数据后就会超时,请问该怎么解决啊?

正在学习怎么用jsoup爬取数据,请多指教

http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html

1条回答
地球回转人心会变
2楼-- · 2019-07-10 09:10

...解决了,网站响应太慢了,我把超时时间又加了十倍,慢慢的全部打印完了...

查看更多
登录 后发表回答