关于python:python爬虫实战爬虫之路永无止境

1次阅读

共计 3463 个字符,预计需要花费 9 分钟才能阅读完成。

!/usr/bin/nev python

-coding:utf8-

from threading import Thread
import tkinter as tk
import os, requests, jsonpath
from requests_html import HTMLSession
session = HTMLSession()
class kuwoSpider(object):

def __init__(self):
    """定义可视化窗口,并设置窗口和主题大小布局"""
    self.window = tk.Tk()
    self.window.title('音乐下载器')
    self.window.geometry('800x600')
    """创立 label_user 按钮,与说明书"""
    self.label_user = tk.Label(self.window, text='请输出要下载的歌手名字:',
                               font=('Arial', 12), width=130, height=2)
    self.label_user.pack()
    """创立 label_user 关联输出"""
    self.entry_user = tk.Entry(self.window, show=None, font=('Arial', 14))
    self.entry_user.pack(after=self.label_user)
    """创立 label_passwd 按钮,与说明书"""
    self.label_passwd = tk.Label(self.window, text="请输出爬取页数:", font=('Arial', 12), width=30, height=2)
    self.label_passwd.pack()
    """创立 label_passwd 关联输出"""
    self.entry_passwd = tk.Entry(self.window, show=None, font=('Arial', 14))
    self.entry_passwd.pack(after=self.label_passwd)
    """创立 Text 富文本框,用于按钮操作后果的展现"""
    self.text1 = tk.Text(self.window, font=('Arial', 12), width=85, height=22)
    self.text1.pack()
    """定义按钮 1,绑定触发事件办法"""
    self.button_1 = tk.Button(self.window, text='爬取', font=('Arial', 12), width=10, height=1,
                              command=self.run)
    self.button_1.pack(before=self.text1)
    """定义按钮 2,绑定触发事件办法"""
    self.button_2 = tk.Button(self.window, text='革除', font=('Arial', 12), width=10, height=1,
                              command=self.parse_hit_click_2)
    self.button_2.pack(anchor="e")
def run(self):
    Thread(target=self.parse_hit_click_1).start()
def parse_hit_click_1(self):
    """定义触发事件 1, 调用 main 函数"""
    singer_name = self.entry_user.get()
    page = int(self.entry_passwd.get())
    self.main(singer_name, page)
def main(self, singer_name, page):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
                      '87.0.4280.88 Safari/537.36',
        'Referer': 请用本人的,
        'Cookie': 请用本人的,
        'csrf': 请用本人的,
        'Host': 'www.kuwo.cn'
    }
    for i in range(page):
        start_url = f'http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={singer_name}&pn=' \
                    f'{i + 1}&rn=30&httpsStatus=1&reqId=d301af60-6e1e-11ec-840f-dfca3a2ceb68'
        response = requests.get(start_url, headers=headers).json()
        # 解析失去 song_names, song_rids
        song_names = jsonpath.jsonpath(response, '$..name')
        song_rids = jsonpath.jsonpath(response, '$..rid')
        # 遍历失去 song_name, [利率期货](https://www.gendan5.com/p/2022-01-18/359653.html)song_rid 结构 song_info_url
        for song_name, song_rid in zip(song_names, song_rids):
            song_info_url = 'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={}' \

‘&type=music&httpsStatus=1&reqId=c0ac92a0-6e35-11ec-b428-05be0a87bc11’.format(song_rid)

            # 申请失去 song_info
            try:
                song_info = requests.get(song_info_url, headers=headers).json()
                # 解析失去 song_url
                song_url = jsonpath.jsonpath(song_info, '$..url')[0]
                # 申请失去 song_content
                try:
                    song_content = requests.get(song_url).content
                except Exception as e:
                    continue
                # 创立文件夹
                if not os.path.exists('./{}'.format(singer_name)):
                    os.mkdir('./{}'.format(singer_name))
                # 保留数据
                try:
                    with open('./{}/{}.mp3'.format(singer_name, song_name), 'wb')as f:
                        f.write(song_content)
                        self.text1.insert("insert", '****{}**** 下载胜利'.format(song_name))
                        self.text1.insert("insert", '\n')
                        self.text1.insert("insert", '\n')
                except Exception as e:
                    continue
            except Exception as e:
                continue
def parse_hit_click_2(self):
    """定义触发事件 2,删除文本框中内容"""
    self.entry_user.delete(0, "end")
    self.entry_passwd.delete(0, "end")
    self.text1.delete("1.0", "end")
def center(self):
    """创立窗口居中函数办法"""
    ws = self.window.winfo_screenwidth()
    hs = self.window.winfo_screenheight()
    x = int((ws / 2) - (800 / 2))
    y = int((hs / 2) - (600 / 2))
    self.window.geometry('{}x{}+{}+{}'.format(800, 600, x, y))
def run_loop(self):
    """禁止批改窗体大小规格"""
    self.window.resizable(False, False)
    """窗口居中"""
    self.center()
    """窗口维持 -- 长久化"""
    self.window.mainloop()

if name == ‘__main__’:

b = kuwoSpider()
b.run_loop()
正文完
 0