关于人工智能:python笔记使用python的pyquery简单爬取数据demo

9次阅读

共计 1108 个字符,预计需要花费 3 分钟才能阅读完成。

#!/bin/env python
#_*_ coding: utf-8 _*_

from pyquery import PyQuery as pq
import time
import random


def get_appinfo_from_yyb(app_pack,storenum):
    url=xxxxxxxxxxxxxx
    data = pq(url)
    if storenum==1:
        app_name = data('.det-name-int').text()
        app_down_cnt = data('.det-ins-num').text().replace(u'下载' ,'')
        app_desc = data('.det-app-data-info').text()
        if len(app_desc)==0:
            return ""text ='\t'.join([app_pack, app_name, app_down_cnt, app_desc])

    elif storenum==2:
        app_name = data('title').text()[:-7]
        app_down_cnt = ""app_desc = data('.app-text .pslide').text().replace('\n','')
        if len(app_desc)==0:
            return ""text ='\t'.join([app_pack, app_name, app_down_cnt, app_desc])

    return text

if __name__ == '__main__':
    storenum = 2
    file_num = "11999.csv"
    input_path= "1filename"+file_num
    output_path = "1result"+file_num
    #time.sleep(3600*6)
    with open(input_path) as fr, open(output_path, "w", encoding='utf-8') as fw:
        print(input_path,output_path)
        num = 1
        for app_apck_name in fr:
            app_apck_name=app_apck_name.strip('\n')
            app_info = get_appinfo_from_yyb(app_apck_name,storenum)
            print(num, app_apck_name, app_info)
            if len(app_info)>0:
                fw.write('%s\n' % app_info)
            num = num +1
            sleep_num = random.randint(100, 3000)/1000
            time.sleep(sleep_num)
""""""
正文完
 0