共计 3132 个字符,预计需要花费 8 分钟才能阅读完成。
因为轻餐邦会员导出功能关闭,所以通过爬虫的方式,进行导出
代码如下
#-*- coding:utf-8 -*-
from selenium import webdriver
from time import sleep
import json
import xlwt
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
if __name__ == '__main__':
dr = webdriver.Chrome()
url = "http://co.qingcanbang.com/"
dr.get(url)
bid = 'xxxx'
username = 'xxxx'
password = 'xxxx'
chainCode = dr.find_element_by_xpath('//*[@id="chainCode"]')
account = dr.find_element_by_xpath('//*[@id="accountOrBusiness"]')
passwd = dr.find_element_by_xpath('//*[@id="password"]')
chainCode.clear()
chainCode.send_keys(bid)
account.clear()
account.send_keys(username)
passwd.clear()
passwd.send_keys(password)
login_btn = dr.find_element_by_xpath('//*[@id="login"]')
sleep(3)
login_btn.click()
member_btn = dr.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[3]/ul/li[4]/a')
member_btn.click()
sleep(3)
member2_btn = dr.find_element_by_xpath('//*[@id="layouts_left_member_memberInfo"]/a')
member2_btn.click()
k = 0
s1 = '#topMain > div.main_right > div.main_table > div.table_content > table > tbody > tr:nth-child('
s3 = ') > td:nth-child('
s5 = ')'
s2 = 21
s4 = 13
data = {}
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('mysheet', cell_overwrite_ok=True)
for page in range(2,2298):
nextpage = 'http://co.qingcanbang.com/member/memberInfo/init?page=' + str(page) + '&pageSize=10'
for i in range(2, 22):
str2 = s1 + str(i) + s3 + '2' + s5
str3 = s1 + str(i) + s3 + '3' + s5
str4 = s1 + str(i) + s3 + '4' + s5
str5 = s1 + str(i) + s3 + '5' + s5
str6 = s1 + str(i) + s3 + '6' + s5
str7 = s1 + str(i) + s3 + '7' + s5
str8 = s1 + str(i) + s3 + '8' + s5
str9 = s1 + str(i) + s3 + '9' + s5
str10 = s1 + str(i) + s3 + '10' + s5
str11 = s1 + str(i) + s3 + '11' + s5
str12 = s1 + str(i) + s3 + '12' + s5
str13 = s1 + str(i) + s3 + '13' + s5
str14 = s1 + str(i) + s3 + '14' + s5
d2 = dr.find_element_by_css_selector(str2).text.encode('utf-8')
d3 = dr.find_element_by_css_selector(str3).text.encode('utf-8')
d4 = dr.find_element_by_css_selector(str4).text
d5 = dr.find_element_by_css_selector(str5).text
d6 = dr.find_element_by_css_selector(str6).text
d7 = dr.find_element_by_css_selector(str7).text
d8 = dr.find_element_by_css_selector(str8).text
d9 = dr.find_element_by_css_selector(str9).text
d10 = dr.find_element_by_css_selector(str10).text
d11 = dr.find_element_by_css_selector(str11).text
d12 = dr.find_element_by_css_selector(str12).text
d13 = dr.find_element_by_css_selector(str13).text
d14 = dr.find_element_by_css_selector(str14).text
# data = {"会员姓名": d2, "手机号": d3, "卡号": d4, "成长值": d5, "总储值余额": d6, "积分": d7,
# "储值总额": d8, "会员特权": d9, "归属门店": d10, "创建时间":d11, "来源":d12, "卡状态": d13}
worksheet.write(k, 2, d2)
worksheet.write(k, 3, d3)
worksheet.write(k, 4, d4)
worksheet.write(k, 5, d5)
worksheet.write(k, 6, d6)
worksheet.write(k, 7, d7)
worksheet.write(k, 8, d8)
worksheet.write(k, 9, d9)
worksheet.write(k, 10, d10)
worksheet.write(k, 11, d11)
worksheet.write(k, 12, d12)
worksheet.write(k, 13, d13)
worksheet.write(k, 14, d14)
# 编辑信息
id_bjxx = '//*[@id="topMain"]/div[12]/div[2]/div[2]/table/tbody/tr[%s]/td[15]/a[1]' % str(i)
bjxx = dr.find_element_by_xpath(id_bjxx)
dr.execute_script("arguments[0].scrollIntoView();", bjxx)
bjxx.click()
d15 = dr.find_element_by_xpath('//*[@id="birthday"]').get_attribute('value')
dr.find_element_by_css_selector('#editMemberForm > div > div.button > a.gray').click()
worksheet.write(k, 15, d15)
k += 1
print "已经导出 %d 条会员数据..." % (k)
dr.get(nextpage)
print "已完成 第 %d 页" % page
page +=1
workbook.save("/users/wangqi/ldb.xls")
# dr.close()
正文完