简介
类似于 selenium,pyppeteer 也能渲染网页,但是它是异步的。
使用方法
安裝方法
pip install pyppeteer
# python 3.7.5
import asyncio
from pyppeteer import launch
from pyquery import PyQuery as pq
async def main():
browser = await launch()
page = await browser.newPage()
await page.goto("http://quotes.toscrape.com/js/")
doc = pq(await page.content())
print("Quotes:", doc(".quote").length)
await browser.close()
asyncio.run(main())
复杂案例,屏蔽 css,图片,字体等
import asyncio
from pyppeteer import launch
from pyquery import PyQuery as pq
class Global:
browser = None
async def intercept_request(req):
"""屏蔽几类资源"""
if req.resourceType in ["image", "media", "eventsource", "websocket", "stylesheet", "font"]:
await req.abort()
else:
await req.continue_()
async def fetch():
page = await Global.browser.newPage()
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
"(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299"
) # 自定义 useragent
await page.setViewport({"width": 1080, "height": 960})
await page.setRequestInterception(True)
page.on("request", intercept_request)
await page.goto("https://juejin.im/timeline")
await asyncio.sleep(3)
doc = pq(await page.content())
print("Quotes:", doc("a").length)
await page.close()
async def main():
Global.browser = await launch()
await asyncio.gather(*[fetch() for _ in range(10)]) # 并发
await Global.browser.close()
asyncio.get_event_loop().run_until_complete(main())