# -*- coding: utf-8 -*-"""Created on Mon Sep 7 21:44:39 2020@author: ASUS"""外汇常见问题https://www.kaifx.cn/lists/question/import requestsimport timeimport jsonimport xlwtworkbook = xlwt.Workbook(encoding=' utf-8')mysheet = workbook.add_sheet('mysheet')mysheet.write(0, 0, 'positionId')mysheet.write(0, 1, 'positionName')mysheet.write(0, 2, 'companyId')mysheet.write(0, 3, 'companyFullName')mysheet.write(0, 4, 'city')mysheet.write(0, 5, 'companyLabelList')mysheet.write(0, 6, 'companyLogo')mysheet.write(0, 7, 'companyShortName')mysheet.write(0, 8, 'companySize')mysheet.write(0, 9, 'createTime')mysheet.write(0, 10, 'district')mysheet.write(0, 11, 'education')mysheet.write(0, 12, 'financeStage')mysheet.write(0, 13, 'firstType')mysheet.write(0, 14, 'formatCreateTime')mysheet.write(0, 15, 'industryField')mysheet.write(0, 16, 'jobNature')mysheet.write(0, 17, 'lastLogin')mysheet.write(0, 18, 'latitude')mysheet.write(0, 19, 'linestaion')mysheet.write(0, 20, 'longitude')mysheet.write(0, 21, 'matchScore')mysheet.write(0, 22, 'positionAdvantage')mysheet.write(0, 23, 'positionId')mysheet.write(0, 24, 'positionLables')mysheet.write(0, 25, 'positionName')mysheet.write(0, 26, 'secondType')mysheet.write(0, 27, 'skillLables')mysheet.write(0, 28, 'stationname')mysheet.write(0, 29, 'subwayline')mysheet.write(0, 30, 'thirdType')mysheet.write(0, 31, 'workYear')def main(kd,pages,row): # 通过拜访主网页获取cookies和session url1 = 'https://www.lagou.com/jobs/list_python?city=%E5%85%A8%E5%9B%BD&cl=false&fromSearch=true&labelWords=&suginput=' # 提交ajax申请,获取json数据 url = "https://www.lagou.com/jobs/positionAjax.json?px=default&needAddtionalResult=false" # 申请头 headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Referer': 'https://www.lagou.com/jobs/list_python?px=default&city=%E5%85%A8%E5%9B%BD', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36', 'Host': 'www.lagou.com' } # 应用data来决定获取多少页的json数据 for page in range(1, pages): data = { 'first': 'false', 'pn': page, 'kd': 'python' } data['kd']=kd s = requests.Session() # 建设session s.get(url=url1, headers=headers, timeout=1) cookie = s.cookies # 获取cookie respon = s.post(url=url, headers=headers, data=data, cookies=cookie, timeout=3) time.sleep(1) #print(respon.text) result = json.loads(respon.text) info = result["content"]["positionResult"]["result"] print(len(info)) for j in info: mysheet.write(row, 0, j['positionId']) mysheet.write(row, 1, j['positionName']) mysheet.write(row, 2, j['companyId']) mysheet.write(row, 3, j['companyFullName']) mysheet.write(row, 4, j['city']) mysheet.write(row, 5, j['companyLabelList']) mysheet.write(row, 6, j['companyLogo']) mysheet.write(row, 7, j['companyShortName']) mysheet.write(row, 8, j['companySize']) mysheet.write(row, 9, j['createTime']) mysheet.write(row, 10, j['district']) mysheet.write(row, 11, j['education']) mysheet.write(row, 12, j['financeStage']) mysheet.write(row, 13, j['firstType']) mysheet.write(row, 14, j['formatCreateTime']) mysheet.write(row, 15, j['industryField']) mysheet.write(row, 16, j['jobNature']) mysheet.write(row, 17, j['lastLogin']) mysheet.write(row, 18, j['latitude']) mysheet.write(row, 19, j['linestaion']) mysheet.write(row, 20, j['longitude']) mysheet.write(row, 21, j['matchScore']) mysheet.write(row, 22, j['positionAdvantage']) mysheet.write(row, 23, j['positionId']) mysheet.write(row, 24, j['positionLables']) mysheet.write(row, 25, j['positionName']) mysheet.write(row, 26, j['secondType']) mysheet.write(row, 27, j['skillLables']) mysheet.write(row, 28, j['stationname']) mysheet.write(row, 29, j['subwayline']) mysheet.write(row, 30, j['thirdType']) mysheet.write(row, 31, j['workYear']) row=row+1 workbook.save('py3.xls')# 获取前两页的职位json信息kd=input('输出关键字:')pages=int(input('输出要爬取多少页:'))main(kd,pages,1)# 后果如下:# {"resubmitToken":null,"requestId":null,"msg":null,"success":true,"content":{"hrInfoMap":{"6187967":{"userId":11765418,"phone":null,"positionName":"招聘经理",........."pageSize":15},"code":0}