关于python:acwing查看做题总数

74次阅读

共计 1479 个字符,预计需要花费 4 分钟才能阅读完成。

import re
import requests
from lxml import etree
class Spider(object):

def __init__(self):
    self.base_url = 'https://www.acwing.com/problem/{}/'
    self.headers = {
        'Cookie': 'csrftoken=mixU7wxaV35yyyCDhqbXcIoW3z3Ms0NH31jbbqH; sessionid='
                  '344bo4nowvp9misa9suynjiwz2i5jcof; file_2922585_readed=""; file_2302034_readed=""','Referer':'https://www.acwing.com/about/','User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'' (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36'
    }
    self.count = 0
def get_html(self, url):
    html = requests.get(
        url=url,
        headers=self.headers
    ).text
    return html
def xpath_func(self, html):
    name_bds = '//tbody/tr[./td/span[@title=" 已通过这道题目 "]]/td/a/text()'
    base_obj = etree.HTML(html)
    name_lists = base_obj.xpath(name_bds)
    L = []
    for i in name_lists:
        L.append(i.strip())
    return L
def re_func(self, html, re_bds):
    pattern = re.compile(re_bds, re.S)
    re_list = pattern.findall(html)
    return re_list
def parse_html(self, url):
    html = self.get_html(url)
    L = self.xpath_func(html)
    return L
def run(self):
    warning = input('[Skrill 下载](https://www.gendan5.com/wallet/Skrill.html) 您马上就要爬取 acwing 了,看一下你的做题数,您的劳动成果将会在上面展现进去,确定要看吗?(Y/N)')
    if warning == 'Y':
        print('爬虫零碎曾经启动... 正在致力抓取,请稍等....')
        print('+---------------------------------+')
        print('|            name                 |')
        print('+---------------------------------+')
        for i in range(1, 80):
            url = self.base_url.format(i)
            L = self.parse_html(url)
            for _ in L:
                self.count += 1
                print('|' + _)
        print('+---------------------------------+')
        print('通过您的不懈努力,您一共做了' + str(self.count) + '道题,持续致力!!')
    else:
        print('曾经退出,你这个弱者')

if name == ‘__main__’:

spider = Spider()
spider.run()

正文完
 0