简单梳理下python的一些常用代码

    xiaoxiao2021-04-16  58

    判断文件或者文件夹是否存在 if(os.path.exists(rootdir) == False) 创建文件夹 os.mkdir(rootdir) 调用系统命令 os.system(cmd) 字典循环 for key,value in dict.items() 打开文件并读取内容进行处理 fd = open('xxxx.txt', encoding='utf-8') for line in fd:      print line fd.close() 创建文件并写入内容 fd = open('xxxx.txt', 'a+', encoding='utf-8') fd.write('aaaaa' + '\n') fd.close() 使用xlrd读取EXCEL 导入 import xlrd 打开excel data = xlrd.open_workbook('demo.xls') #注意这里的workbook首字母是小写 查看文件中包含sheet的名称 data.sheet_names() 得到第一个工作表,或者通过索引顺序 或 工作表名称 table = data.sheets()[0] table = data.sheet_by_index(0) table = data.sheet_by_name(u'Sheet1') 获取行数和列数 nrows = table.nrows ncols = table.ncols 获取整行和整列的值(数组) table.row_values(i) table.col_values(i) 循环行,得到索引的列表 for rownum in range(table.nrows): print table.row_values(rownum) 单元格 cell_A1 = table.cell(0,0).value cell_C4 = table.cell(2,3).value 分别使用行列索引 cell_A1 = table.row(0)[0].value cell_A2 = table.col(1)[0].value 简单的写入 row = 0 col = 0 ctype = 1 # 类型 0 empty,1 string, 2 number, 3 date, 4 boolean, 5 error value = 'lixiaoluo' xf = 0 # 扩展的格式化 (默认是0) table.put_cell(row, col, ctype, value, xf) table.cell(0,0) # 文本:u'lixiaoluo' table.cell(0,0).value # 'lixiaoluo' 使用xlwt写入EXCEL 导入xlwt import xlwt 新建一个excel文件 file = xlwt.Workbook() #注意这里的Workbook首字母是大写,无语吧 新建一个sheet table = file.add_sheet('sheet name') 写入数据table.write(行,列,value) table.write(0,0,'test') 如果对一个单元格重复操作,会引发 returns error: # Exception: Attempt to overwrite cell: # sheetname=u'sheet 1' rowx=0 colx=0 所以在打开时加cell_overwrite_ok=True解决 table = file.add_sheet('sheet name',cell_overwrite_ok=True) 保存文件 file.save('demo.xls') 另外,使用style style = xlwt.XFStyle() #初始化样式 font = xlwt.Font() #为样式创建字体 font.name = 'Times New Roman' font.bold = True style.font = font #为样式设置字体 table.write(0, 0, 'some bold Times text', style) # 使用样式 命令行getopt try:      options,args = getopt.getopt(sys.argv[1:],"hp:i:",["help","ip=","port="]) except getopt.GetoptError:      sys.exit() for name,value in options:      if name in ("-h","--help"):           usage()      if name in ("-i","--ip"):           print(value)      if name in ("-p","--port"):           print(value) 简单爬虫 import requests AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36' HEADERS = {         'User-Agent': AGENT,         'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',         'X-Requested-With':'XMLHttpRequest',         'Accept':'*/*' session = requests.session() #模拟登录 postdata = {     'defaults':'xxx',     'fromLogin':'xxx',     'userName':'xxx',     'password':'xxxx' } url = 'xxxxxxxx' login_info = session.post(url, headers = HEADERS, data = postdata,verify = False) if(login_info.status_code == requests.codes.ok):     print('login success')     return True else:     print('login err')     return False } #下载html页面 def downloadUrl(rootdir, url, orgid, page):     html = session.get(url, headers=global_config.HEADERS, verify=False)     if(html.text[1:7] == 'script'):         print(html.text)         return "err"     if(len(html.text) < 60):         return "err"     sample = open(rootdir + "/" + str(orgid) + '_' + str(page) + ".html", "w", encoding='utf-8')     sample.write(html.text)     sample.close()     return 'ok' 解析JOSN文件内容 def scrapy_by_file(json_file_name):     #读取JSON文件的内容     text = open(json_file_name, encoding='utf-8').read()     #特殊处理,去除从WINDOWS系统带过来的BOM特殊字符     if text.startswith(u'\ufeff'):         text = text.encode('utf8')[3:].decode('utf8')     #将文本内容的JSON数据转换成自定义的JSON对象     try:         json_data = json.loads(text)     except:         print(json_file_name)         return     for row in json_data['rows']: def scrapy_by_row(row):     try:         orgid = row['organization']['id']         familyid = row['censusRegisterFamily']['id']     except:         print('errrr')         return         scrapy_by_row(row) 遍历文件夹 #遍历目录(rootdir) 遍历到的每个文件都执行dirFunc def waklThroughDir(rootdir, dirFunc):     for parent, dirnames, filenames in os.walk(rootdir):         for filename in filenames:             print(filename)             #获取后缀为txt的文件             if(filename.split('.')[-1] == 'html'):                 dirFunc(os.path.join(parent, filename)) 采集温州房产网基本信息 # -*- coding: utf-8 -*- import re import requests import time #-----------------------------用于解析的正则表达式常量------------------------------------------------------------------ #解析页数 PAGE_NUM = '共找到 (.*?) 符合条件的记录' #解析小区名称 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小区价格 PRICE = 'class="hot_price">(.*?)</span>' #解析小区地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #文件生成路径 ROOTDIR = 'F:\\test\\' #-----------------------------模拟请求的头部信息,否则将被识别出是程序抓包而被拦截-------------------------------------- HEADERS = {     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',     'Accept-Encoding': 'gzip, deflate, sdch',     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',     'Host': 'www.0577home.net',     'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一页的房产信息,pageNo为页号-------------------------------------------------------- def getHouseListByPageno(pageNo):     #建立一个连接用于后续发起请求     session = requests.session()     url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html'     houseList = session.get(url, headers = HEADERS, verify = False)     #以写入模式打开文件     fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",  'w' ,encoding='utf-8')     #将movieList写入文件     fh.write(houseList.text)     #关闭文件     fh.close() #-------------------------------获取需要抓取的页面总数------------------------------------------------------------------ def getPageNum():     #打开已经下载好的第一页房产内容     f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8')     #获取文件内容     rawContent = f.read()     #用正则表达式解析页面内容     pageNum = re.findall(PAGE_NUM, rawContent)     #返回页面号     return int(pageNum[0]) / 20 + 1 def parseHouseListToFile(srcFile, dstFile):     #打开待解析的文件     f = open(srcFile, encoding='utf-8')     #读取文件内容以备解析     rawContent = f.read()     p = re.compile('\s+')     content = re.sub(p, '', rawContent)     dnames = re.findall(NAME, content)     names = []     for dname in dnames:         idx = dname.rfind('>')         names.append(dname[idx + 1:])     prices = re.findall(PRICE, content)     daddress = re.findall(ADDRESS, content)     address = []     for daddr in daddress:         id = daddr.rfind('>')         address.append(daddr[id + 1:])     i = 0     for x in names:         #写入时用'$'做分割,结尾加上回车符         dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '\n')         i = i + 1 #-------------------------------主函数,下载并解析房产信息-------------------------------------------------------------- if __name__ == '__main__':     #---------------------抓取页面-----------------------------     #抓取第一页房产信息     getHouseListByPageno(1)     #通过第一页房产信息获取总共要抓取的页面数量     pageNum = getPageNum()     #抓取剩余的页面     for i in range(2, int(pageNum) + 1):         getHouseListByPageno(str(i))     #---------------------解析页面-----------------------------     #获取当前年月日     localtime = time.strftime('%Y%m%d', time.localtime(time.time()))     #创建一个文件,文件名前面带上年月日     f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8')     #解析所有的页面     #for k in range(1, int(pageNum) + 1):     for k in range(1, 115):         parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f)     #关闭文件     f.close() 采集温州房产网详细信息 # -*- coding: utf-8 -*- import re import requests import time import os #-----------------------------用于解析的正则表达式常量------------------------------------------------------------------ #解析页数 PAGE_NUM = '共找到 (.*?) 符合条件的记录' #解析小区名称 NAME = 'texttext_title"><ahref(.*?)</a></div><divclass="texttext_moreinfo">' #解析小区价格 PRICE = 'class="hot_price">(.*?)</span>' #解析小区地址 ADDRESS = 'text_moreinfo">(.*?)</div><divclass="texttext_moreinfo"><span>' #解析小区编号 ID = 'class="picdiv_left"><ahref="http://www.0577home.net/xiaoqu/(.*?).html' #解析小区所属区域 LOCATION = '<div><a>所属区域:</a><span>(.*?)</span></div>' #解析小区占地面积 AREA = '<div><a>占地面积:</a><span>(.*?)</span></div>' #解析小区绿化率 GREENINGRATE = '<div><a>绿化率:</a><span>(.*?)</span></div>' #解析小区楼总数 LAYER = '<div><a>楼总数:</a><span>(.*?)</span></div>' #解析小区物业类型 TYPE = '<div><a>物业类型:</a><span>(.*?)</span></div>' #解析小区所属小学 PRIMARYSCHOOL = '<div><a>所属小学:</a><span>(.*?)</span></div>' #解析小区总建筑面积 BUILDINGAREA = '<div><a>总建筑面积:</a><span>(.*?)</span></div>' #解析小区容积率 PLOTRATIO = '<div><a>容积率:</a><span>(.*?)</span></div>' #解析小区开发商 DEVEPLOPER = '<div><a>开发商:</a><span>(.*?)</span></div>' #文件生成路径 ROOTDIR = 'F:\\test\\' #-----------------------------模拟请求的头部信息,否则将被识别出是程序抓包而被拦截-------------------------------------- HEADERS = {     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',     'Accept-Encoding': 'gzip, deflate, sdch',     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36',     'Host': 'www.0577home.net',     'Upgrade-Insecure-Requests': '1' } #-----------------------------抓取某一页的房产信息,pageNo为页号-------------------------------------------------------- def getHouseListByPageno(pageNo):     #建立一个连接用于后续发起请求     session = requests.session()     url = 'http://www.0577home.net/xiaoqu/list_0_0_0_0_0_0_0_' + str(pageNo) + '.html'     houseList = session.get(url, headers = HEADERS, verify = False)     #以写入模式打开文件     fh = open(ROOTDIR + "houseList_pageNo" + str(pageNo) + ".txt",  'w' ,encoding='utf-8')     #将movieList写入文件     fh.write(houseList.text)     #关闭文件     fh.close() def getHouseInfoByPageno(pageNo, k):     if(os.path.exists(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html")):         return     print('downloading !, count %s, page %s' % (str(k), str(pageNo)))     #建立一个连接用于后续发起请求     session = requests.session()     url = 'http://www.0577home.net/xiaoqu/detail_' + str(pageNo) + '.html'     houseList = session.get(url, headers = HEADERS, verify = False)     #以写入模式打开文件     fh = open(ROOTDIR + "houseInfo_pageNo" + str(pageNo) + ".html",  'w' ,encoding='utf-8')     #将movieList写入文件     fh.write(houseList.text)     #关闭文件     fh.close() #-------------------------------获取需要抓取的页面总数------------------------------------------------------------------ def getPageNum():     #打开已经下载好的第一页房产内容     f = open(ROOTDIR + 'houseList_pageNo1.txt', encoding='utf-8')     #获取文件内容     rawContent = f.read()     #用正则表达式解析页面内容     pageNum = re.findall(PAGE_NUM, rawContent)     #返回页面号     return int(pageNum[0]) / 20 + 1 def parseHouseInfo(srcFile):     #打开待解析的文件     f = open(srcFile, encoding='utf-8')     #读取文件内容以备解析     content = f.read()     # p = re.compile('\s+')     # content = re.sub(p, '', rawContent)     location = re.findall(LOCATION, content)[0]     location = location.split(' ')     category1 = location[0]     category2 = location[1]     area = re.findall(AREA, content)[0]     greeningrate = re.findall(GREENINGRATE, content)[0]     layer = re.findall(LAYER, content)[0]     type = re.findall(TYPE, content)[0]     primaryschool = re.findall(PRIMARYSCHOOL, content)[0]     buildingarea = re.findall(BUILDINGAREA, content)[0]     plotratio = re.findall(PLOTRATIO, content)[0]     developer = re.findall(DEVEPLOPER, content)[0]     f.close()     return (category1, category2, area, greeningrate, layer, type, primaryschool, buildingarea, plotratio, developer) def parseHouseListToFile(srcFile, dstFile):     #打开待解析的文件     f = open(srcFile, encoding='utf-8')     #读取文件内容以备解析     rawContent = f.read()     p = re.compile('\s+')     content = re.sub(p, '', rawContent)     dnames = re.findall(NAME, content)     names = []     for dname in dnames:         idx = dname.rfind('>')         names.append(dname[idx + 1:])     prices = re.findall(PRICE, content)     daddress = re.findall(ADDRESS, content)     ids = re.findall(ID, content)     address = []     for daddr in daddress:         id = daddr.rfind('>')         address.append(daddr[id + 1:])     i = 0     f.close()     for x in names:         #写入时用'$'做分割,结尾加上回车符         dstFile.write(names[i] + '$' + prices[i] + '$' + address[i] + '$' + ids[i] + '\n')         i = i + 1 #-------------------------------主函数,下载并解析房产信息-------------------------------------------------------------- if __name__ == '__main__':     #---------------------抓取页面-----------------------------     #抓取第一页房产信息     # getHouseListByPageno(1)     # #通过第一页房产信息获取总共要抓取的页面数量     # pageNum = getPageNum()     # #抓取剩余的页面     # for i in range(2, int(pageNum) + 1):     #    getHouseListByPageno(str(i))     #---------------------解析页面-----------------------------     #获取当前年月日     localtime = time.strftime('%Y%m%d', time.localtime(time.time()))     #创建一个文件,文件名前面带上年月日     f = open(ROOTDIR + localtime + '_houseList.txt', 'a+', encoding='utf-8')     #解析所有的页面     #for k in range(1, int(pageNum) + 1):     for k in range(1, 115):         parseHouseListToFile(ROOTDIR + "houseList_pageNo" + str(k) + ".txt", f)     #关闭文件     f.close()     f = open(ROOTDIR + localtime + '_houseList.txt', encoding='utf-8')     fd = open(ROOTDIR + localtime + '_houseInfo.txt', 'w', encoding='utf-8')     k = 0     for line in f:         data = line.strip('\n')         data = data.split('$')         idx = data[3]         getHouseInfoByPageno(idx, k)         houseInfo = parseHouseInfo(ROOTDIR + "houseInfo_pageNo" + str(idx) + ".html")         print(str(k) + "$".join(data) + '$' + "$".join(houseInfo))         fd.write("$".join(data) + '$' + "$".join(houseInfo) + '\n')         k += 1     f.close()     fd.close() 读取csv文件 with open('job.csv', 'r') as f:     reader = csv.reader(f)     for row in reader:           print(row) 写入csv文件 #创建CSV文件并写入第一行 def createCsv(file):     if not os.path.exists(file):         csvfile = open(file, 'a+', encoding='utf-8', newline='')         writer = csv.writer(csvfile)         writer.writerow(paramname)     else:         csvfile = open(file, 'a+', newline='')         writer = csv.writer(csvfile)     return writer python调用JAVA import sys import jpype name = sys.argv[1] jarpath = '/home/dsadm/why/python' jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.ext.dirs=%s" % jarpath) DECRYPT = jpype.JClass('why.fmrt.decrypt.DECRYPT') upperName =DECRYPT.decrypt(name) print(upperName) jpype.shutdownJVM() 简单验证码破解 from urllib.request import urlretrieve from urllib.request import urlopen from bs4 import BeautifulSoup import subprocess import requests from PIL import Image from PIL import ImageOps def cleanImage(imagePath):     image = Image.open(imagePath)     image = image.point(lambda x: 0 if x<143 else 255)     borderImage = ImageOps.expand(image,border=20,fill='white')     borderImage.save(imagePath) html = urlopen("http://www.pythonscraping.com/humans-only") bsObj = BeautifulSoup(html, "html.parser") #Gather prepopulated form values imageLocation = bsObj.find("img", {"title": "Image CAPTCHA"})["src"] formBuildId = bsObj.find("input", {"name":"form_build_id"})["value"] captchaSid = bsObj.find("input", {"name":"captcha_sid"})["value"] captchaToken = bsObj.find("input", {"name":"captcha_token"})["value"] captchaUrl = "http://pythonscraping.com"+imageLocation urlretrieve(captchaUrl, "captcha.jpg") cleanImage("captcha.jpg") p = subprocess.Popen(["tesseract", "captcha.jpg", "captcha"], stdout=     subprocess.PIPE,stderr=subprocess.PIPE) p.wait() f = open("captcha.txt", "r") #Clean any whitespace characters captchaResponse = f.read().replace(" ", "").replace("\n", "") print("Captcha solution attempt: "+captchaResponse) if len(captchaResponse) == 5:     params = {"captcha_token":captchaToken, "captcha_sid":captchaSid,               "form_id":"comment_node_page_form", "form_build_id": formBuildId,                   "captcha_response":captchaResponse, "name":"Ryan Mitchell",                   "subject": "I come to seek the Grail",                   "comment_body[und][0][value]":                                           "...and I am definitely not a bot"}     r = requests.post("http://www.pythonscraping.com/comment/reply/10",                           data=params)     responseObj = BeautifulSoup(r.text)     if responseObj.find("div", {"class":"messages"}) is not None:         print(responseObj.find("div", {"class":"messages"}).get_text()) else:     print("There was a problem reading the CAPTCHA correctly!") 滑块验证码破解 from selenium import webdriver from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.action_chains import ActionChains import PIL.Image as image import time,re, random import requests try:     from StringIO import StringIO except ImportError:     from io import StringIO #爬虫模拟的浏览器头部信息 agent = 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0' headers = {         'User-Agent': agent         } # 根据位置对图片进行合并还原 # filename:图片 # location_list:图片位置 #内部两个图片处理函数的介绍 #crop函数带的参数为(起始点的横坐标,起始点的纵坐标,宽度,高度) #paste函数的参数为(需要修改的图片,粘贴的起始点的横坐标,粘贴的起始点的纵坐标) def get_merge_image(filename,location_list):     #打开图片文件     im = image.open(filename)     #创建新的图片,大小为260*116     new_im = image.new('RGB', (260,116))     im_list_upper=[]     im_list_down=[]     # 拷贝图片     for location in location_list:         #上面的图片         if location['y']==-58:             im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))         #下面的图片         if location['y']==0:             im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))     new_im = image.new('RGB', (260,116))     x_offset = 0     #黏贴图片     for im in im_list_upper:         new_im.paste(im, (x_offset,0))         x_offset += im.size[0]     x_offset = 0     for im in im_list_down:         new_im.paste(im, (x_offset,58))         x_offset += im.size[0]     return new_im #下载并还原图片 # driver:webdriver # div:图片的div def get_image(driver,div):     #找到图片所在的div     background_images=driver.find_elements_by_xpath(div)     location_list=[]     imageurl=''     #图片是被CSS按照位移的方式打乱的,我们需要找出这些位移,为后续还原做好准备     for background_image in background_images:         location={}         #在html里面解析出小图片的url地址,还有长高的数值         location['x']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][1])         location['y']=int(re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][2])         imageurl=re.findall("background-image: url\(\"(.*)\"\); background-position: (.*)px (.*)px;",background_image.get_attribute('style'))[0][0]         location_list.append(location)     #替换图片的后缀,获得图片的URL     imageurl=imageurl.replace("webp","jpg")     #获得图片的名字     imageName = imageurl.split('/')[-1]     #获得图片     session = requests.session()     r = session.get(imageurl, headers = headers, verify = False)     #下载图片     with open(imageName, 'wb') as f:         f.write(r.content)         f.close()     #重新合并还原图片     image=get_merge_image(imageName, location_list)     return image #对比RGB值 def is_similar(image1,image2,x,y):     pass     #获取指定位置的RGB值     pixel1=image1.getpixel((x,y))     pixel2=image2.getpixel((x,y))     for i in range(0,3):         # 如果相差超过50则就认为找到了缺口的位置         if abs(pixel1[i]-pixel2[i])>=50:             return False     return True #计算缺口的位置 def get_diff_location(image1,image2):     i=0     # 两张原始图的大小都是相同的260*116     # 那就通过两个for循环依次对比每个像素点的RGB值     # 如果相差超过50则就认为找到了缺口的位置     for i in range(0,260):         for j in range(0,116):             if is_similar(image1,image2,i,j)==False:                 return  i #根据缺口的位置模拟x轴移动的轨迹 def get_track(length):     pass     list=[]     #间隔通过随机范围函数来获得,每次移动一步或者两步     x=random.randint(1,3)     #生成轨迹并保存到list内     while length-x>=5:         list.append(x)         length=length-x         x=random.randint(1,3)     #最后五步都是一步步移动     for i in range(length):         list.append(1)     return list #滑动验证码破解程序 def main():     #打开火狐浏览器     driver = webdriver.Firefox()     #用火狐浏览器打开网页     driver.get("http://www.geetest.com/exp_embed")     #等待页面的上元素刷新出来     WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed())     WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed())     WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed())     #下载图片     image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div")     image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div")     #计算缺口位置     loc=get_diff_location(image1, image2)     #生成x的移动轨迹点     track_list=get_track(loc)     #找到滑动的圆球     element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")     location=element.location     #获得滑动圆球的高度     y=location['y']     #鼠标点击元素并按住不放     print ("第一步,点击元素")     ActionChains(driver).click_and_hold(on_element=element).perform()     time.sleep(0.15)     print ("第二步,拖动元素")     track_string = ""     for track in track_list:         #不能移动太快,否则会被认为是程序执行         track_string = track_string + "{%d,%d}," % (track, y - 445)         #xoffset=track+22:这里的移动位置的值是相对于滑动圆球左上角的相对值,而轨迹变量里的是圆球的中心点,所以要加上圆球长度的一半。         #yoffset=y-445:这里也是一样的。不过要注意的是不同的浏览器渲染出来的结果是不一样的,要保证最终的计算后的值是22,也就是圆球高度的一半         ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track+22, yoffset=y-445).perform()         #间隔时间也通过随机函数来获得,间隔不能太快,否则会被认为是程序执行         time.sleep(random.randint(10,50)/100)     print (track_string)     #xoffset=21,本质就是向后退一格。这里退了5格是因为圆球的位置和滑动条的左边缘有5格的距离     ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()     time.sleep(0.1)     ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()     time.sleep(0.1)     ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()     time.sleep(0.1)     ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()     time.sleep(0.1)     ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=21, yoffset=y-445).perform()     print ("第三步,释放鼠标")     #释放鼠标     ActionChains(driver).release(on_element=element).perform()     time.sleep(3)     #点击验证     # submit = driver.find_element_by_xpath("//div[@class='gt_ajax_tip success']")     # print(submit.location)     # time.sleep(5)     #关闭浏览器,为了演示方便,暂时注释掉.     #driver.quit() #主函数入口 if __name__ == '__main__':     pass     main() python构建web页面 import os import tornado.httpserver import tornado.ioloop import tornado.options import tornado.web from view import * from tornado.options import define, options define("port", default=8000, help="run on the given port", type=int) class Application(tornado.web.Application):     def __init__(self):         handlers = [             (r"/", Indexhandler),         ]         settings = dict(             template_path=os.path.join(os.path.dirname(__file__), 'templates'),             autoescape=None,             debug=False,         )         tornado.web.Application.__init__(self, handlers, **settings) if __name__ == "__main__":     tornado.options.parse_command_line()     http_server = tornado.httpserver.HTTPServer(Application(), xheaders=True)     http_server.listen(options.port)     tornado.ioloop.IOLoop.instance().start() 定时任务 #! /usr/bin/env python # coding=utf-8 import time, os, sched # 第一个参数确定任务的时间,返回从某个特定的时间到现在经历的秒数 # 第二个参数以某种人为的方式衡量时间 schedule = sched.scheduler(time.time, time.sleep) def perform_command(cmd, inc):     # 安排inc秒后再次运行自己,即周期运行     schedule.enter(inc, 0, perform_command, (cmd, inc))     os.system(cmd) def timming_exe(cmd, inc=60):     # enter用来安排某事件的发生时间,从现在起第n秒开始启动     schedule.enter(inc, 0, perform_command, (cmd, inc))     # 持续运行,直到计划时间队列变成空为止     schedule.run() #每隔一天调用getMovieList.py程序 timming_exe("getMovieList.py", 60 * 60 * 24) 通过百度地图API,标准化地址 from urllib.request import urlopen from urllib.parse import urlencode from urllib.error import URLError import json class xBaiduMap:     def __init__(self, key='mgf2Gxr7EgnfPVQnpClZnsug'):         self.host = 'http://api.map.baidu.com'         self.path = '/geocoder?'         self.param = {'address': None, 'output': 'json', 'key': key, 'location': None, 'city': None}     def getLocation(self, address, city=None):         rlt = self.geocoding('address', address, city)         if rlt != None:             l = rlt['result']             if isinstance(l, list):                 return None             return l['location']['lat'], l['location']['lng']     def getAddress(self, lat, lng):         rlt = self.geocoding('location', "{0},{1}".format(lat, lng))         if rlt != None:             l = rlt['result']             #return l['formatted_address']             # Here you can get more details about the location with 'addressComponent' key             ld=rlt['result']['addressComponent']             return (ld['city']+';'+ld['district']+';'+ld['street']+";"+ld['street_number'])     def geocoding(self, key, value, city=None):         if key == 'location':             if 'city' in self.param:                 del self.param['city']             if 'address' in self.param:                 del self.param['address']         elif key == 'address':             if 'location' in self.param:                 del self.param['location']             if city == None and 'city' in self.param:                 del self.param['city']             else:                 self.param['city'] = city         self.param[key] = value         try:             r = urlopen(self.host + self.path + urlencode(self.param)).read()         except URLError:             print ("URLError")             return None         str_response = r.decode('utf-8')         rlt = json.loads(str_response)         if rlt['status'] == 'OK':             return rlt         else:             print ("Decoding Failed")             return None 多进程 import multiprocessing for process_id in range(PROCESS_NUM):     p = multiprocessing.Process(target=worker, args=(process_id,))     jobs.append(p)     p.start() 文件切割小程序 def split_file(file_name, file_num):     #文件已经存在     if(os.path.exists("split_0.txt")):         return     #统计文件的总行数     count = -1     file = open(file_name, encoding='utf-8')     for count, line in enumerate(file):         pass     count += 1     file.close()     #每个文件的行数     count_per_file = count / file_num     #创建file_num个新文件     for i in range(file_num):         file = open("split_" + str(i) + ".txt", 'w', encoding='utf-8')         file.close()     #分割成file_num个新文件     file = open(file_name, encoding='utf-8')     count = -1     for count, line in enumerate(file):         file_index = (int)(count /count_per_file)         sub_file = open("split_" + str(file_index) + ".txt", "a+", encoding='utf-8')         if(sub_file != None):             sub_file.write(line) python操作DB2 import ibm_db con = ibm_db.connect("DATABASE=FMRT;HOSTNAME=XX.XX.XX.XX;PORT=60000;PORTOCOL=TCPIP;UID=db2inst1;PWD=db2inst1;", "", "") sql = getSql(inputfile) stmt = ibm_db.exec_immediate(con, sql) result = ibm_db.fetch_both(stmt) rowidx = 0 while (result):     #DO SOMETHING     result = ibm_db.fetch_both(stmt) ibm_db.close(con) jieba中文分词 import jieba seg_list = jieba.cut("我来到北京清华大学", cut_all=True) for line in seg_list:     print(line) print("Full Mode: " + "/ ".join(seg_list))  # 全模式 seg_list = jieba.cut("我来到北京清华大学", cut_all=False) print("Default Mode: " + "/ ".join(seg_list))  # 精确模式 seg_list = jieba.cut("他来到了网易杭研大厦")  # 默认是精确模式 print(", ".join(seg_list)) seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造")  # 搜索引擎模式 print(", ".join(seg_list)) 月末判断 import calendar import sys def isMonthEnd(datetime):     year = int(datetime[0:4])     month = int(datetime[4:6])     day = int(datetime[6:8])     wday, monthrange = calendar.monthrange(year, month)     if(day == monthrange):         return 1     else:         return 0 isMonthEnd(sys.argv[1]) 移除中文分隔符 cmd = "sed ':a;N;$ s/\\r\\n//g;ba' " + oldfile + " > " + newfile os.system(cmd) 多线程 # -*- coding: utf-8 -*- """     thread     ~~~~~~~~~~~~~~~~   Thread framework     :copyright: (c) 2016 by why.     :license: MIT, see LICENSE for more details. """ import threading class Threadconfig():     def __init__(self, thread_size):         self.thread_size = thread_size     def topen(self):         self.thread_tasks = []     def build(self, func, **kwargs):         self.thread_task = threading.Thread(target=func, kwargs=(kwargs))         self.thread_tasks.append(self.thread_task)     def run(self):         for thread_task in self.thread_tasks:             thread_task.setDaemon(True)             thread_task.start()         while 1:             alive = False             for thread_num in range(0, self.thread_size):                 alive = alive or self.thread_tasks[thread_num].isAlive()             if not alive:                 break     def __del__(self):         self.thread_tasks = []
    转载请注明原文地址: https://ju.6miu.com/read-672704.html

    最新回复(0)