import requests
from lxml import etree
from crawlers.userAgent import useragent
import re
import json
import time
class video(object):
def __init__(self,url): # url 为输出的链接 self.url = url self.page = 0 self.u = useragent()def getPages(self): # 获取以后链接界面的总页数 headers = {'user-agent':self.u.getUserAgent()} rsp = requests.get(url=self.url,headers=headers) html = etree.HTML(rsp.text) aList = html.xpath('//div[@class="user-paginator"]/ul/li/a') print('视频总页数为:',aList[-2].text) self.page = int(input("请输出想下载的视频页数:"))def downloadVideo(self): # 下载视频的办法,并没有下载视频,只是获取视频的下载链接 for i in range(1,self.page+1): if i == 1: url2 = '{}?sort=news'.format(self.url) else: url2 = '{}?sort=news&p={}'.format(self.url,i) headers = {'user-agent':self.u.getUserAgent()} rsp = requests.get(url=url2,headers=headers) html2 = etree.HTML(rsp.text) hrefs = html2.xpath('//div[@class="content-list"]/ul/li/a') for j in range(len(hrefs)): href = hrefs[j].xpath('./@href')[0] title = hrefs[j].xpath('./@title')[0] print('视频名称为[金属期货](https://www.gendan5.com/cf/mf.html):',title) vid = re.findall("/play/(\d*).html",href)[0] # 获取vid self.getDownloadHref(vid=vid) print('#'*50) time.sleep(2)def getDownloadHref(self,vid): url3 = 'https://v-api-player-ssl.huya.com' params={'callback': 'jQuery1124017458848743440036_1632126349635', 'r': 'vhuyaplay/video', 'vid': vid, 'format': 'mp4,m3u8', '_': '1632126349643'} rsp = requests.get(url=url3,headers={'user-agent':self.u.getUserAgent()},params=params) infos = rsp.text lindex = infos.find('(') rindex = infos.find(')') dict2 = json.loads(infos[lindex+1:rindex]) list2 = dict2['result']['items'] v_list2=['高清','原画','晦涩'] for i in range(len(list2)): print(v_list2[i],list2[i]['transcode']['urls'][0])
if name == '__main__':
url = input("请输出视频链接:")v = video(url)v.getPages()v.downloadVideo()