共计 2054 个字符,预计需要花费 6 分钟才能阅读完成。
爬 pixiv 须要迷信上网
导包
import requests,os,time,winsound
设置蜂鸣,当下载实现之后零碎蜂鸣
duration = 3000 # millisecond
freq = 440 # Hz
设置 headers
headers = {
"cookie": 你的 cookie,
'Referer': 'https://www.pixiv.net/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
提取你本人的 cookie,先登录,右键网页,点击查看,点击 network
左侧轻易点一个申请,找到 headers,复制 cookie
o = 1# 计数器,显示下载到第几个作品
for m in range(1,3):# 只下载前 100 的作品
shs = requests.get("https://www.pixiv.net/ranking.php?p="+str(m)+"&format=json",headers=headers)# 发动申请
shs.encoding='utf-8'
print(shs.text)
zbj = shs.json()
for k in zbj["contents"]:
illust_id = k["illust_id"]# 提取作品 id
swk = requests.get("https://www.pixiv.net/ajax/illust/"+str(illust_id)+"/pages?lang=zh",headers=headers)
swk_j = swk.json()
j = 1# 计数器,显示下载到该作品的第几个图片
for i in swk_j["body"]:
original = i["urls"]["original"].replace("\\","")# 去掉多余符号
print(original)
ts = requests.get(original, headers=headers)
print(str(o)+" "+str(j))
print(ts)
if not os.path.exists("pic"):# 创立一个图片文件夹
os.mkdir("pic")
with open("pic"+"/"+str(illust_id)+"_"+str(j)+"."+original.split(".")[-1], "wb") as f:
f.write(ts.content)# 保留图片
j+=1
time.sleep(2)# 劳动 2s 避免被反爬
o+=1
winsound.Beep(freq, duration)# 零碎蜂鸣
功败垂成
残缺代码
import requests,os,time,winsound
duration = 3000 # millisecond
freq = 440 # Hz
headers = {
"cookie": 你的 cookie,
'Referer': 'https://www.pixiv.net/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
o = 1
for m in range(1,3):
shs = requests.get("https://www.pixiv.net/ranking.php?p="+str(m)+"&format=json",headers=headers)
shs.encoding='utf-8'
print(shs.text)
zbj = shs.json()
for k in zbj["contents"]:
illust_id = k["illust_id"]
swk = requests.get("https://www.pixiv.net/ajax/illust/"+str(illust_id)+"/pages?lang=zh",headers=headers)
swk_j = swk.json()
j = 1
for i in swk_j["body"]:
original = i["urls"]["original"].replace("\\","")
print(original)
ts = requests.get(original, headers=headers)
print(str(o)+" "+str(j))
print(ts)
if not os.path.exists("pic"):
os.mkdir("pic")
with open("pic"+"/"+str(illust_id)+"_"+str(j)+"."+original.split(".")[-1], "wb") as f:
f.write(ts.content)
j+=1
time.sleep(2)
o+=1
winsound.Beep(freq, duration)# 零碎蜂鸣
正文完