关于python:Python爬虫给我一个链接虎牙视频随便下载

4次阅读

共计 1654 个字符,预计需要花费 5 分钟才能阅读完成。

import requests
from lxml import etree
from crawlers.userAgent import useragent
import re
import json
import time
class video(object):

def __init__(self,url):
    # url 为输出的链接
    self.url = url
    self.page = 0
    self.u = useragent()
def getPages(self):
    # 获取以后链接界面的总页数
    headers = {'user-agent':self.u.getUserAgent()}
    rsp = requests.get(url=self.url,headers=headers)
    html = etree.HTML(rsp.text)
    aList = html.xpath('//div[@class="user-paginator"]/ul/li/a')
    print('视频总页数为:',aList[-2].text)
    self.page = int(input("请输出想下载的视频页数:"))
def downloadVideo(self):
    # 下载视频的办法, 并没有下载视频,只是获取视频的下载链接
    for i in range(1,self.page+1):
        if i == 1:
            url2 = '{}?sort=news'.format(self.url)
        else:
            url2 = '{}?sort=news&p={}'.format(self.url,i)
        headers = {'user-agent':self.u.getUserAgent()}
        rsp = requests.get(url=url2,headers=headers)
        html2 = etree.HTML(rsp.text)
        hrefs = html2.xpath('//div[@class="content-list"]/ul/li/a')
        for j in range(len(hrefs)):
            href = hrefs[j].xpath('./@href')[0]
            title = hrefs[j].xpath('./@title')[0]
            print('视频名称为 [ 金属期货](https://www.gendan5.com/cf/mf.html):',title)
            vid = re.findall("/play/(\d*).html",href)[0]   # 获取 vid
            self.getDownloadHref(vid=vid)
            print('#'*50)
        time.sleep(2)
def getDownloadHref(self,vid):
    url3 = 'https://v-api-player-ssl.huya.com'
    params={'callback': 'jQuery1124017458848743440036_1632126349635',
            'r': 'vhuyaplay/video',
            'vid': vid,
            'format': 'mp4,m3u8',
            '_': '1632126349643'}
    rsp = requests.get(url=url3,headers={'user-agent':self.u.getUserAgent()},params=params)
    infos = rsp.text
    lindex = infos.find('(')
    rindex = infos.find(')')
    dict2 = json.loads(infos[lindex+1:rindex])
    list2 = dict2['result']['items']
    v_list2=['高清','原画','晦涩']
    for i in range(len(list2)):
        print(v_list2[i],list2[i]['transcode']['urls'][0])

if name == ‘__main__’:

url = input("请输出视频链接:")
v = video(url)
v.getPages()
v.downloadVideo()
正文完
 0