网络爬虫

    xiaoxiao2021-12-14  20

    package test; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.http.Consts; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.ResponseHandler; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.junit.Test; import org.htmlparser.*; import org.htmlparser.filters.HasAttributeFilter; import org.htmlparser.util.NodeList; import org.htmlparser.tags.LinkTag; import com.ssitcloud.common.util.HttpClientUtil; public class HttpTest{ @Test public void test() throws Exception{ /*Map<String, String> map=new HashMap<>(); map.put("req", jsonString1); HttpClientUtil.doPost(url, map, Consts.UTF_8.toString()); Map<String, String> map2=new HashMap<>(); map2.put("req", jsonString2); HttpClientUtil.doPost(url, map2, Consts.UTF_8.toString());*/ HttpPost httpPost = new HttpPost("http://www.baidu.com"); CloseableHttpClient httpClient = HttpClients.createDefault(); CloseableHttpResponse response = null; String result = null; HttpGet httpGet = new HttpGet("http://www.baidu.com"); try { /*response = httpClient.execute(httpPost); HttpEntity entity = response.getEntity(); if(entity != null){ result = EntityUtils.toString(entity,"UTF-8"); } System.out.println(result); */ // Create a custom response handler ResponseHandler<String> responseHandler = new ResponseHandler<String>() { public String handleResponse(final HttpResponse response) throws ClientProtocolException, IOException { int status = response.getStatusLine().getStatusCode(); if (status >= 200 && status < 300) { HttpEntity entity = response.getEntity(); return entity != null ? EntityUtils.toString(entity,"UTF-8") : null; } else { throw new ClientProtocolException("Unexpected response status: " + status); } } }; String responseBody = httpClient.execute(httpGet, responseHandler); System.out.println(responseBody); Parser parser = new Parser(responseBody); HasAttributeFilter filter = new HasAttributeFilter("a"); NodeList list = parser.parse(filter); int count = list.size(); //process every link on this page for(int i=0; i<count; i++) { Node node = list.elementAt(i); LinkTag lt = (LinkTag)node; System.out.println(lt.getLinkText()); } } catch (IOException e) { e.printStackTrace(); }finally{ if(httpClient!=null){ try { httpClient.close(); httpClient=null; } catch (IOException e) { }finally{ if(response!=null){ try { response.close(); } catch (IOException e) { } } } } } } }
    转载请注明原文地址: https://ju.6miu.com/read-962188.html

    最新回复(0)