转载 自http://blog.csdn.net/qq_17505335/article/details/51767364。
package com.dx.util; import java.io.BufferedReader; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.net.URL; import java.net.URLConnection; import java.util.Date; import java.util.HashSet; import java.util.Iterator; import java.util.Set;
import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements;
public class CrawlerUtil {
public static void main(String[] args) {
//网址 String url2 = “http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=index&fr=&sf=1&fmq=&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=美女&f=3&oq=meinv&rsp=0#z=0&pn=&ic=0&st=-1&face=0&s=0&lm=-1”; Set set = getImgUrls(url2); downImg(set); }
/* * 获取图片地址 */ public static Set getImgUrls(String url){ Set set = new HashSet(); try { //获取文本对象 Document dom = Jsoup.parse(new URL(url),500000);
//根据标签名获取 Elements es3 = dom.getElementsByTag(“img”); System.out.println(es3.size()); for(Element e:es3){ //获取图片地址 String img_url = e.attr(“src”); //筛选出以.jpg格式的图片 if(img_url.endsWith(“.jpg”)){ set.add(img_url); } } } catch (Exception e) { e.printStackTrace(); } return set; }
/* * 下载图片 */ public static void downImg(Setset){
InputStream inputStream = null; OutputStream outputStream = null; File file = new File(“D:\pic”); try {
if(set.size() != 0){ Iterator it = set.iterator(); while(it.hasNext()){ //1.获取网址 URL u = new URL(it.next()); //2.打开连接 URLConnection conn = u.openConnection(); //3.获取输入流与写出流 inputStream = conn.getInputStream(); //判断文件是否存在 if(!file.exists()){ file.mkdir(); } outputStream = new FileOutputStream(new File(“D:\pic\”+new Date().getTime()+”.jpg”)); //4.将源代码写入内存(设置编码) byte[] b= new byte[2048]; int len = 0; while((len = inputStream.read(b)) != -1){ outputStream.write(b, 0, len); } } } } catch (Exception e) { e.printStackTrace(); }finally{ //关闭I/o try { if(outputStream != null)outputStream.close(); if(inputStream != null)inputStream.close(); } catch (IOException e) { e.printStackTrace(); }
} } }
参考资源:http://www.open-open.com/