百度图片是动态加载的,本例只是抓取了网页上的js源码,做的正则匹配
#encoding=utf-8 import urllib, urllib2 import os import re url = r'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&fm=result&fr=&sf=1&fmq=1492068395730_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&word=鱼' imgPath = r'/home/lhy/PycharmProjects/images/imgs/fish' imgHtml = urllib2.urlopen(url).read().decode('utf-8') # test html # print(imgHtml) urls = re.findall(r'"objURL":"(.*?)"', imgHtml) if not os.path.isdir(imgPath): os.mkdir(imgPath) index = 1 for url in urls: print("下载:", url) # 未能正确获得网页 就进行异常处理 try: res = urllib2.urlopen(url) if str(res.status) != '200': print('未下载成功:', url) continue except Exception as e: print('未下载成功:', url) filename = os.path.join(imgPath, str(index) + '.jpg') with open(filename, 'wb') as f: f.write(res.read()) print('下载完成\n') index += 1 print("下载结束,一共下载了 %s 张图片" % (index - 1))