简单梳理下python的一些常用代码

xiaoxiao2021-04-16 60

判断文件或者文件夹是否存在 if(os.path.exists(rootdir) == False) 创建文件夹 os.mkdir(rootdir) 调用系统命令 os.system(cmd) 字典循环 for key,value in dict.items() 打开文件并读取内容进行处理 fd = open('xxxx.txt', encoding='utf-8') for line in fd: print line fd.close() 创建文件并写入内容 fd = open('xxxx.txt', 'a+', encoding='utf-8') fd.write('aaaaa' + '\n') fd.close() 使用xlrd读取EXCEL 导入 import xlrd 打开excel data = xlrd.open_workbook('demo.xls') #注意这里的workbook首字母是小写查看文件中包含sheet的名称 data.sheet_names() 得到第一个工作表，或者通过索引顺序或工作表名称 table = data.sheets()[0] table = data.sheet_by_index(0) table = data.sheet_by_name(u'Sheet1') 获取行数和列数 nrows = table.nrows ncols = table.ncols 获取整行和整列的值（数组） table.row_values(i) table.col_values(i) 循环行,得到索引的列表 for rownum in range(table.nrows): print table.row_values(rownum) 单元格 cell_A1 = table.cell(0,0).value cell_C4 = table.cell(2,3).value 分别使用行列索引 cell_A1 = table.row(0)[0].value cell_A2 = table.col(1)[0].value 简单的写入 row = 0 col = 0 ctype = 1 # 类型 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error value = 'lixiaoluo' xf = 0 # 扩展的格式化 (默认是0) table.put_cell(row, col, ctype, value, xf) table.cell(0,0) # 文本:u'lixiaoluo' table.cell(0,0).value # 'lixiaoluo' 使用xlwt写入EXCEL 导入xlwt import xlwt 新建一个excel文件 file = xlwt.Workbook() #注意这里的Workbook首字母是大写，无语吧新建一个sheet table = file.add_sheet('sheet name') 写入数据table.write(行,列,value) table.write(0,0,'test') 如果对一个单元格重复操作，会引发 returns error: # Exception: Attempt to overwrite cell: # sheetname=u'sheet 1' rowx=0 colx=0 所以在打开时加cell_overwrite_ok=True解决 table = file.add_sheet('sheet name',cell_overwrite_ok=True) 保存文件 file.save('demo.xls') 另外，使用style style = xlwt.XFStyle() #初始化样式 font = xlwt.Font() #为样式创建字体 font.name = 'Times New Roman' font.bold = True style.font = font #为样式设置字体 table.write(0, 0, 'some bold Times text', style) # 使用样式命令行getopt try: options,args = getopt.getopt(sys.argv[1:],"hp:i:",["help","ip=","port="]) except getopt.GetoptError: sys.exit() for name,value in options: if name in ("-h","--help"): usage() if name in ("-i","--ip"): print(value) if name in ("-p","--port"): print(value) 简单爬虫 import requests AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' HEADERS = { 'User-Agent': AGENT, 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'X-Requested-With':'XMLHttpRequest', 'Accept':'*/*' session = requests.session() #模拟登录 postdata = { 'defaults':'xxx', 'fromLogin':'xxx', 'userName':'xxx', 'password':'xxxx' } url = 'xxxxxxxx' login_info = session.post(url, headers = HEADERS, data = postdata,verify = False) if(login_info.status_code == requests.codes.ok): print('login success') return True else: print('login err') return False } #下载html页面 def downloadUrl(rootdir, url, orgid, page): html = session.get(url, headers=global_config.HEADERS, verify=False) if(html.text[1:7] == 'script'): print(html.text) return "err" if(len(html.text) < 60): return "err" sample = open(rootdir + "/" + str(orgid) + '_' + str(page) + ".html", "w", encoding='utf-8') sample.write(html.text) sample.close() return 'ok' 解析JOSN文件内容 def scrapy_by_file(json_file_name): #读取JSON文件的内容 text = open(json_file_name, encoding='utf-8').read() #特殊处理,去除从WINDOWS系统带过来的BOM特殊字符 if text.startswith(u'\ufeff'): text = text.encode('utf8')[3:].decode('utf8') #将文本内容的JSON数据转换成自定义的JSON对象 try: json_data = json.loads(text) except: print(json_file_name) return for row in json_data['rows']: def scrapy_by_row(row): try: orgid = row['organization']['id'] familyid = row['censusRegisterFamily']['id'] except: print('errrr') return scrapy_by_row(row) 遍历文件夹 #遍历目录(rootdir) 遍历到的每个文件都执行dirFunc def waklThroughDir(rootdir, dirFunc): for parent, dirnames, filenames in os.walk(rootdir): for filename in filenames: print(filename) #获取后缀为txt的文件 if(filename.split('.')[-1] == 'html'): dirFunc(os.path.join(parent, filename)) 采集温州房产网基本信息 # -*- coding: utf-8 -*- import re import requests import time #-----------------------------用于解析的正则表达式常量------------------------------------------------------------------ #解析页数 PAGE_NUM = '共找到 (.*?) 符合条件的记录' #解析小区名称 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小区价格 PRICE = 'class="hot_price">(.*?)</span>' #解析小区地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #文件生成路径 ROOTDIR = 'F:\\test\\' #-----------------------------模拟请求的头部信息，否则将被识别出是程序抓包而被拦截-------------------------------------- HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36', 'Host': 'www.0577home.net', 'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一页的房产信息，pageNo为页号-------------------------------------------------------- def getHouseListByPageno(pageNo): #建立一个连接用于后续发起请求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以写入模式打开文件 fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt", 'w' ,encoding='utf-8') #将movieList写入文件 fh.write(houseList.text) #关闭文件 fh.close() #-------------------------------获取需要抓取的页面总数------------------------------------------------------------------ def getPageNum(): #打开已经下载好的第一页房产内容 f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8') #获取文件内容 rawContent = f.read() #用正则表达式解析页面内容 pageNum = re.findall(PAGE_NUM, rawContent) #返回页面号 return int(pageNum[0]) / 20 + 1 def parseHouseListToFile(srcFile, dstFile): #打开待解析的文件 f = open(srcFile, encoding='utf-8') #读取文件内容以备解析 rawContent = f.read() p = re.compile('\s+') content = re.sub(p, '', rawContent) dnames = re.findall(NAME, content) names = [] for dname in dnames: idx = dname.rfind('>') names.append(dname[idx + 1:]) prices = re.findall(PRICE, content) daddress = re.findall(ADDRESS, content) address = [] for daddr in daddress: id = daddr.rfind('>') address.append(daddr[id + 1:]) i = 0 for x in names: #写入时用'$'做分割，结尾加上回车符 dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '\n') i = i + 1 #-------------------------------主函数，下载并解析房产信息-------------------------------------------------------------- if __name__ == '__main__': #---------------------抓取页面----------------------------- #抓取第一页房产信息 getHouseListByPageno(1) #通过第一页房产信息获取总共要抓取的页面数量 pageNum = getPageNum() #抓取剩余的页面 for i in range(2, int(pageNum) + 1): getHouseListByPageno(str(i)) #---------------------解析页面----------------------------- #获取当前年月日 localtime = time.strftime('%Y%m%d', time.localtime(time.time())) #创建一个文件，文件名前面带上年月日 f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8') #解析所有的页面 #for k in range(1, int(pageNum) + 1): for k in range(1, 115): parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f) #关闭文件 f.close() 采集温州房产网详细信息 # -*- coding: utf-8 -*- import re import requests import time import os #-----------------------------用于解析的正则表达式常量------------------------------------------------------------------ #解析页数 PAGE_NUM = '共找到 (.*?) 符合条件的记录' #解析小区名称 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小区价格 PRICE = 'class="hot_price">(.*?)</span>' #解析小区地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #解析小区编号 ID = 'class="picdiv_left"><ahref="http://www.0577home.net/xiaoqu/(.*?).html' #解析小区所属区域 LOCATION = '<div><a>所属区域：</a><span>(.*?)</span></div>' #解析小区占地面积 AREA = '<div><a>占地面积：</a><span>(.*?)</span></div>' #解析小区绿化率 GREENINGRATE = '<div><a>绿化率：</a><span>(.*?)</span></div>' #解析小区楼总数 LAYER = '<div><a>楼总数：</a><span>(.*?)</span></div>' #解析小区物业类型 TYPE = '<div><a>物业类型：</a><span>(.*?)</span></div>' #解析小区所属小学 PRIMARYSCHOOL = '<div><a>所属小学：</a><span>(.*?)</span></div>' #解析小区总建筑面积 BUILDINGAREA = '<div><a>总建筑面积：</a><span>(.*?)</span></div>' #解析小区容积率 PLOTRATIO = '<div><a>容积率：</a><span>(.*?)</span></div>' #解析小区开发商 DEVEPLOPER = '<div><a>开发商：</a><span>(.*?)</span></div>' #文件生成路径 ROOTDIR = 'F:\\test\\' #-----------------------------模拟请求的头部信息，否则将被识别出是程序抓包而被拦截-------------------------------------- HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36', 'Host': 'www.0577home.net', 'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一页的房产信息，pageNo为页号-------------------------------------------------------- def getHouseListByPageno(pageNo): #建立一个连接用于后续发起请求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以写入模式打开文件 fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt", 'w' ,encoding='utf-8') #将movieList写入文件 fh.write(houseList.text) #关闭文件 fh.close() def getHouseInfoByPageno(pageNo, k): if(os.path.exists(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html")): return print('downloading !, count %s, page %s' % (str(k), str(pageNo))) #建立一个连接用于后续发起请求 session = requests.session() url = 'http://www.0577home.net/xiaoqu/detail_' + str(pageNo) + '.html' houseList = session.get(url, headers = HEADERS, verify = False) #以写入模式打开文件 fh = open(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html", 'w' ,encoding='utf-8') #将movieList写入文件 fh.write(houseList.text) #关闭文件 fh.close() #-------------------------------获取需要抓取的页面总数------------------------------------------------------------------ def getPageNum(): #打开已经下载好的第一页房产内容 f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8') #获取文件内容 rawContent = f.read() #用正则表达式解析页面内容 pageNum = re.findall(PAGE_NUM, rawContent) #返回页面号 return int(pageNum[0]) / 20 + 1 def parseHouseInfo(srcFile): #打开待解析的文件 f = open(srcFile, encoding='utf-8') #读取文件内容以备解析 content = f.read() # p = re.compile('\s+') # content = re.sub(p, '', rawContent) location = re.findall(LOCATION, content)[0] location = location.split(' ') category1 = location[0] category2 = location[1] area = re.findall(AREA, content)[0] greeningrate = re.findall(GREENINGRATE, content)[0] layer = re.findall(LAYER, content)[0] type = re.findall(TYPE, content)[0] primaryschool = re.findall(PRIMARYSCHOOL, content)[0] buildingarea = re.findall(BUILDINGAREA, content)[0] plotratio = re.findall(PLOTRATIO, content)[0] developer = re.findall(DEVEPLOPER, content)[0] f.close() return (category1, category2, area, greeningrate, layer, type, primaryschool, buildingarea, plotratio, developer) def parseHouseListToFile(srcFile, dstFile): #打开待解析的文件 f = open(srcFile, encoding='utf-8') #读取文件内容以备解析 rawContent = f.read() p = re.compile('\s+') content = re.sub(p, '', rawContent) dnames = re.findall(NAME, content) names = [] for dname in dnames: idx = dname.rfind('>') names.append(dname[idx + 1:]) prices = re.findall(PRICE, content) daddress = re.findall(ADDRESS, content) ids = re.findall(ID, content) address = [] for daddr in daddress: id = daddr.rfind('>') address.append(daddr[id + 1:]) i = 0 f.close() for x in names: #写入时用'$'做分割，结尾加上回车符 dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '$' + ids[i] + '\n') i = i + 1 #-------------------------------主函数，下载并解析房产信息-------------------------------------------------------------- if __name__ == '__main__': #---------------------抓取页面----------------------------- #抓取第一页房产信息 # getHouseListByPageno(1) # #通过第一页房产信息获取总共要抓取的页面数量 # pageNum = getPageNum() # #抓取剩余的页面 # for i in range(2, int(pageNum) + 1): # getHouseListByPageno(str(i)) #---------------------解析页面----------------------------- #获取当前年月日 localtime = time.strftime('%Y%m%d', time.localtime(time.time())) #创建一个文件，文件名前面带上年月日 f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8') #解析所有的页面 #for k in range(1, int(pageNum) + 1): for k in range(1, 115): parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f) #关闭文件 f.close() f = open(ROOTDIR + localtime + '_houseList.txt', encoding='utf-8') fd = open(ROOTDIR + localtime + '_houseInfo.txt', 'w', encoding='utf-8') k = 0 for line in f: data = line.strip('\n') data = data.split('$') idx = data[3] getHouseInfoByPageno(idx, k) houseInfo = parseHouseInfo(ROOTDIR + "houseInfo_pageNo" + str(idx) + ".html") print(str(k) + "$".join(data) + '$' + "$".join(houseInfo)) fd.write("$".join(data) + '$' + "$".join(houseInfo) + '\n') k += 1 f.close() fd.close() 读取csv文件 with open('job.csv', 'r') as f: reader = csv.reader(f) for row in reader: print(row) 写入csv文件 #创建CSV文件并写入第一行 def createCsv(file): if not os.path.exists(file): csvfile = open(file, 'a+', encoding='utf-8', newline='') writer = csv.writer(csvfile) writer.writerow(paramname) else: csvfile = open(file, 'a+', newline='') writer = csv.writer(csvfile) return writer python调用JAVA import sys import jpype name = sys.argv[1] jarpath = '/home/dsadm/why/python' jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.ext.dirs=%s" % jarpath) DECRYPT = jpype.JClass('why.fmrt.decrypt.DECRYPT') upperName =DECRYPT.decrypt(name) print(upperName) jpype.shutdownJVM() 简单验证码破解 from urllib.request import urlretrieve from urllib.request import urlopen from bs4 import BeautifulSoup import subprocess import requests from PIL import Image from PIL import ImageOps def cleanImage(imagePath): image = Image.open(imagePath) image = image.point(lambda x: 0 if x<143 else 255) borderImage = ImageOps.expand(image,border=20,fill='white') borderImage.save(imagePath) html = urlopen("http://www.pythonscraping.com/humans-only") bsObj = BeautifulSoup(html, "html.parser") #Gather prepopulated form values imageLocation = bsObj.find("img", {"title": "Image CAPTCHA"})["src"] formBuildId = bsObj.find("input", {"name":"form_build_id"})["value"] captchaSid = bsObj.find("input", {"name":"captcha_sid"})["value"] captchaToken = bsObj.find("input", {"name":"captcha_token"})["value"] captchaUrl = "http://pythonscraping.com"+imageLocation urlretrieve(captchaUrl, "captcha.jpg") cleanImage("captcha.jpg") p = subprocess.Popen(["tesseract", "captcha.jpg", "captcha"], stdout= subprocess.PIPE,stderr=subprocess.PIPE) p.wait() f = open("captcha.txt", "r") #Clean any whitespace characters captchaResponse = f.read().replace(" ", "").replace("\n", "") print("Captcha solution attempt: "+captchaResponse) if len(captchaResponse) == 5: params = {"captcha_token":captchaToken, "captcha_sid":captchaSid, "form_id":"comment_node_page_form", "form_build_id": formBuildId, "captcha_response":captchaResponse, "name":"Ryan Mitchell", "subject": "I come to seek the Grail", "comment_body[und][0][value]": "...and I am definitely not a bot"} r = requests.post("http://www.pythonscraping.com/comment/reply/10", data=params) responseObj = BeautifulSoup(r.text) if responseObj.find("div", {"class":"messages"}) is not None: print(responseObj.find("div", {"class":"messages"}).get_text()) else: print("There was a problem reading the CAPTCHA correctly!") 滑块验证码破解 from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains import PIL.Image as image import time,re, random import requests try: from StringIO import StringIO except ImportError: from io import StringIO #爬虫模拟的浏览器头部信息 agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0' headers = { 'User-Agent': agent } # 根据位置对图片进行合并还原 # filename:图片 # location_list:图片位置 #内部两个图片处理函数的介绍 #crop函数带的参数为(起始点的横坐标，起始点的纵坐标，宽度，高度） #paste函数的参数为(需要修改的图片，粘贴的起始点的横坐标，粘贴的起始点的纵坐标） def get_merge_image(filename,location_list): #打开图片文件 im = image.open(filename) #创建新的图片,大小为260*116 new_im = image.new('RGB', (260,116)) im_list_upper=[] im_list_down=[] # 拷贝图片 for location in location_list: #上面的图片 if location['y']==-58: im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166))) #下面的图片 if location['y']==0: im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58))) new_im = image.new('RGB', (260,116)) x_offset = 0 #黏贴图片 for im in im_list_upper: new_im.paste(im, (x_offset,0)) x_offset += im.size[0] x_offset = 0 for im in im_list_down: new_im.paste(im, (x_offset,58)) x_offset += im.size[0] return new_im #下载并还原图片 # driver:webdriver # div:图片的div def get_image(driver,div): #找到图片所在的div background_images=driver.find_elements_by_xpath(div) location_list=[] imageurl='' #图片是被CSS按照位移的方式打乱的,我们需要找出这些位移,为后续还原做好准备 for background_image in background_images: location={} #在html里面解析出小图片的url地址，还有长高的数值 location['x']=int(re.findall("background-image: url$\"(.*)\"$; background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][1]) location['y']=int(re.findall("background-image: url$\"(.*)\"$; background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][2]) imageurl=re.findall("background-image: url$\"(.*)\"$; background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][0] location_list.append(location) #替换图片的后缀,获得图片的URL imageurl=imageurl.replace("webp","jpg") #获得图片的名字 imageName = imageurl.split('/')[-1] #获得图片 session = requests.session() r = session.get(imageurl, headers = headers, verify = False) #下载图片 with open(imageName, 'wb') as f: f.write(r.content) f.close() #重新合并还原图片 image=get_merge_image(imageName, location_list) return image #对比RGB值 def is_similar(image1,image2,x,y): pass #获取指定位置的RGB值 pixel1=image1.getpixel((x,y)) pixel2=image2.getpixel((x,y)) for i in range(0,3): # 如果相差超过50则就认为找到了缺口的位置 if abs(pixel1[i]-pixel2[i])>=50: return False return True #计算缺口的位置 def get_diff_location(image1,image2): i=0 # 两张原始图的大小都是相同的260*116 # 那就通过两个for循环依次对比每个像素点的RGB值 # 如果相差超过50则就认为找到了缺口的位置 for i in range(0,260): for j in range(0,116): if is_similar(image1,image2,i,j)==False: return i #根据缺口的位置模拟x轴移动的轨迹 def get_track(length): pass list=[] #间隔通过随机范围函数来获得,每次移动一步或者两步 x=random.randint(1,3) #生成轨迹并保存到list内 while length-x>=5: list.append(x) length=length-x x=random.randint(1,3) #最后五步都是一步步移动 for i in range(length): list.append(1) return list #滑动验证码破解程序 def main(): #打开火狐浏览器 driver = webdriver.Firefox() #用火狐浏览器打开网页 driver.get("http://www.geetest.com/exp_embed") #等待页面的上元素刷新出来 WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed()) WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed()) WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed()) #下载图片 image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div") image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div") #计算缺口位置 loc=get_diff_location(image1, image2) #生成x的移动轨迹点 track_list=get_track(loc) #找到滑动的圆球 element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']") location=element.location #获得滑动圆球的高度 y=location['y'] #鼠标点击元素并按住不放 print ("第一步,点击元素") ActionChains(driver).click_and_hold(on_element=element).perform() time.sleep(0.15) print ("第二步，拖动元素") track_string = "" for track in track_list: #不能移动太快,否则会被认为是程序执行 track_string = track_string + "{%d,%d}," % (track, y - 445) #xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值，而轨迹变量里的是圆球的中心点，所以要加上圆球长度的一半。 #yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的，要保证最终的计算后的值是22，也就是圆球高度的一半 ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track+22, yoffset=y-445).perform() #间隔时间也通过随机函数来获得,间隔不能太快,否则会被认为是程序执行 time.sleep(random.randint(10,50)/100) print (track_string) #xoffset=21，本质就是向后退一格。这里退了5格是因为圆球的位置和滑动条的左边缘有5格的距离 ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() time.sleep(0.1) ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform() print ("第三步，释放鼠标") #释放鼠标 ActionChains(driver).release(on_element=element).perform() time.sleep(3) #点击验证 # submit = driver.find_element_by_xpath("//div[@class='gt_ajax_tip success']") # print(submit.location) # time.sleep(5) #关闭浏览器,为了演示方便,暂时注释掉. #driver.quit() #主函数入口 if __name__ == '__main__': pass main() python构建web页面 import os import tornado.httpserver import tornado.ioloop import tornado.options import tornado.web from view import * from tornado.options import define, options define("port", default=8000, help="run on the given port", type=int) class Application(tornado.web.Application): def __init__(self): handlers = [ (r"/", Indexhandler), ] settings = dict( template_path=os.path.join(os.path.dirname(__file__), 'templates'), autoescape=None, debug=False, ) tornado.web.Application.__init__(self, handlers, **settings) if __name__ == "__main__": tornado.options.parse_command_line() http_server = tornado.httpserver.HTTPServer(Application(), xheaders=True) http_server.listen(options.port) tornado.ioloop.IOLoop.instance().start() 定时任务 #! /usr/bin/env python # coding=utf-8 import time, os, sched # 第一个参数确定任务的时间，返回从某个特定的时间到现在经历的秒数 # 第二个参数以某种人为的方式衡量时间 schedule = sched.scheduler(time.time, time.sleep) def perform_command(cmd, inc): # 安排inc秒后再次运行自己，即周期运行 schedule.enter(inc, 0, perform_command, (cmd, inc)) os.system(cmd) def timming_exe(cmd, inc=60): # enter用来安排某事件的发生时间，从现在起第n秒开始启动 schedule.enter(inc, 0, perform_command, (cmd, inc)) # 持续运行，直到计划时间队列变成空为止 schedule.run() #每隔一天调用getMovieList.py程序 timming_exe("getMovieList.py", 60 * 60 * 24) 通过百度地图API，标准化地址 from urllib.request import urlopen from urllib.parse import urlencode from urllib.error import URLError import json class xBaiduMap: def __init__(self, key='mgf2Gxr7EgnfPVQnpClZnsug'): self.host = 'http://api.map.baidu.com' self.path = '/geocoder?' self.param = {'address': None, 'output': 'json', 'key': key, 'location': None, 'city': None} def getLocation(self, address, city=None): rlt = self.geocoding('address', address, city) if rlt != None: l = rlt['result'] if isinstance(l, list): return None return l['location']['lat'], l['location']['lng'] def getAddress(self, lat, lng): rlt = self.geocoding('location', "{0},{1}".format(lat, lng)) if rlt != None: l = rlt['result'] #return l['formatted_address'] # Here you can get more details about the location with 'addressComponent' key ld=rlt['result']['addressComponent'] return (ld['city']+';'+ld['district']+';'+ld['street']+";"+ld['street_number']) def geocoding(self, key, value, city=None): if key == 'location': if 'city' in self.param: del self.param['city'] if 'address' in self.param: del self.param['address'] elif key == 'address': if 'location' in self.param: del self.param['location'] if city == None and 'city' in self.param: del self.param['city'] else: self.param['city'] = city self.param[key] = value try: r = urlopen(self.host + self.path + urlencode(self.param)).read() except URLError: print ("URLError") return None str_response = r.decode('utf-8') rlt = json.loads(str_response) if rlt['status'] == 'OK': return rlt else: print ("Decoding Failed") return None 多进程 import multiprocessing for process_id in range(PROCESS_NUM): p = multiprocessing.Process(target=worker, args=(process_id,)) jobs.append(p) p.start() 文件切割小程序 def split_file(file_name, file_num): #文件已经存在 if(os.path.exists("split_0.txt")): return #统计文件的总行数 count = -1 file = open(file_name, encoding='utf-8') for count, line in enumerate(file): pass count += 1 file.close() #每个文件的行数 count_per_file = count / file_num #创建file_num个新文件 for i in range(file_num): file = open("split_" + str(i) + ".txt", 'w', encoding='utf-8') file.close() #分割成file_num个新文件 file = open(file_name, encoding='utf-8') count = -1 for count, line in enumerate(file): file_index = (int)(count /count_per_file) sub_file = open("split_" + str(file_index) + ".txt", "a+", encoding='utf-8') if(sub_file != None): sub_file.write(line) python操作DB2 import ibm_db con = ibm_db.connect("DATABASE=FMRT;HOSTNAME=XX.XX.XX.XX;PORT=60000;PORTOCOL=TCPIP;UID=db2inst1;PWD=db2inst1;", "", "") sql = getSql(inputfile) stmt = ibm_db.exec_immediate(con, sql) result = ibm_db.fetch_both(stmt) rowidx = 0 while (result): #DO SOMETHING result = ibm_db.fetch_both(stmt) ibm_db.close(con) jieba中文分词 import jieba seg_list = jieba.cut("我来到北京清华大学", cut_all=True) for line in seg_list: print(line) print("Full Mode: " + "/ ".join(seg_list)) # 全模式 seg_list = jieba.cut("我来到北京清华大学", cut_all=False) print("Default Mode: " + "/ ".join(seg_list)) # 精确模式 seg_list = jieba.cut("他来到了网易杭研大厦") # 默认是精确模式 print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造") # 搜索引擎模式 print(", ".join(seg_list)) 月末判断 import calendar import sys def isMonthEnd(datetime): year = int(datetime[0:4]) month = int(datetime[4:6]) day = int(datetime[6:8]) wday, monthrange = calendar.monthrange(year, month) if(day == monthrange): return 1 else: return 0 isMonthEnd(sys.argv[1]) 移除中文分隔符 cmd = "sed ':a;N;$ s/\\r\\n//g;ba' " + oldfile + " > " + newfile os.system(cmd) 多线程 # -*- coding: utf-8 -*- """ thread ~~~~~~~~~~~~~~~~ Thread framework :copyright: (c) 2016 by why. :license: MIT, see LICENSE for more details. """ import threading class Threadconfig(): def __init__(self, thread_size): self.thread_size = thread_size def topen(self): self.thread_tasks = [] def build(self, func, **kwargs): self.thread_task = threading.Thread(target=func, kwargs=(kwargs)) self.thread_tasks.append(self.thread_task) def run(self): for thread_task in self.thread_tasks: thread_task.setDaemon(True) thread_task.start() while 1: alive = False for thread_num in range(0, self.thread_size): alive = alive or self.thread_tasks[thread_num].isAlive() if not alive: break def __del__(self): self.thread_tasks = []

转载请注明原文地址: https://ju.6miu.com/read-672704.html

技术

最新回复(0)