首先导入的库当然是selenium,为了设置超时,需要导入time
from selenium import webdriver import time from selenium.webdriver.common.keys import Keys导入之后可以打开网易主页
driver = webdriver.Chrome() driver.get(u"http://music.163.com/") print "正在打开网页",driver.title等待页面加载,选择爬陈粒的网易歌词第一页
time.sleep(2) name = u"陈粒" driver.find_element_by_css_selector("input[class=\"txt j-flag\"]").send_keys(name) driver.find_element_by_css_selector("input[class=\"txt j-flag\"]").send_keys(Keys.ENTER)转到名称为contengframe的框架上来
driver.switch_to_frame("contentFrame")爬前三十首歌,定义一下fun函数
def fun(a): print "正在打开歌曲页面" time.sleep(2) try: driver.find_element_by_css_selector("a[href^='javascript:void(0)']").click() except: js="var q=document.body.scrollTop=500" driver.execute_script(js) time.sleep(1) driver.find_element_by_css_selector("a[href^='javascript:void(0)']").click() else: driver.implicitly_wait(10) # seconds driver.execute_script('window.stop()') print "正在获取歌词" text = driver.find_element_by_id("lyric-content").text text_all = text[:-2] name = driver.find_element_by_class_name("tit").text print "正在新建文件保存" f=open('C:/Users/user/Desktop/lyric/%s.txt'%name.replace("\n", "@@").split("@@")[0],'w') f.write(text_all.encode('utf8')) print "正在保存歌词",name f.close() driver.back() driver.switch_to_frame("contentFrame") time.sleep(2)
此函数为打开歌曲页面之后的操作,然而在打开之前会有很多其他的操作
首先确认该歌曲歌手是不是我们搜索的名字,试图点击是在界面刷新之后的界面,当歌曲链接不在此界面上,则下拉,另外还会出现网易的登录界面如下,此时必须回到主窗口关闭掉再回到刚才的contentframe窗口继续爬取,具体代码如下,另外中间可能会有一些错误出现,可能是由于页面加载不及时等原因,此时重新运行即可
for i in range(0,30): a=i+1 print "第",a,"首" if name in driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(4)'%a).text: try: print "试图点击" driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click() fun(a) except: try: print "试图下拉点击" js="var q=document.body.scrollTop=600" driver.execute_script(js) time.sleep(1) driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click() fun(a) except: try: print "试图二次下拉点击" js="var q=document.body.scrollTop=1000" driver.execute_script(js) time.sleep(1) driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click() fun(a) except: print "试图回到主页面关闭窗口" driver.switch_to.default_content() driver.find_element_by_class_name('zcls').click() driver.switch_to_frame("contentFrame") js="var q=document.body.scrollTop=1000" driver.execute_script(js) time.sleep(1) driver.find_element_by_css_selector('div#m-search>div:nth-child(3)>div>div>div:nth-child(%d)>div:nth-child(2)>div>div>a'%a).click() fun(a) else: pass
