简介
类似于selenium,pyppeteer也能渲染网页,但是它是异步的。
使用方法
安裝方法
pip install pyppeteer
# python 3.7.5import asynciofrom pyppeteer import launchfrom pyquery import PyQuery as pqasync def main(): browser = await launch() page = await browser.newPage() await page.goto("http://quotes.toscrape.com/js/") doc = pq(await page.content()) print("Quotes:", doc(".quote").length) await browser.close()asyncio.run(main())
复杂案例,屏蔽css,图片,字体等
import asynciofrom pyppeteer import launchfrom pyquery import PyQuery as pqclass Global: browser = Noneasync def intercept_request(req): """屏蔽几类资源""" if req.resourceType in ["image", "media", "eventsource", "websocket", "stylesheet", "font"]: await req.abort() else: await req.continue_()async def fetch(): page = await Global.browser.newPage() await page.setUserAgent( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299" ) # 自定义useragent await page.setViewport({"width": 1080, "height": 960}) await page.setRequestInterception(True) page.on("request", intercept_request) await page.goto("https://juejin.im/timeline") await asyncio.sleep(3) doc = pq(await page.content()) print("Quotes:", doc("a").length) await page.close()async def main(): Global.browser = await launch() await asyncio.gather(*[fetch() for _ in range(10)]) # 并发 await Global.browser.close()asyncio.get_event_loop().run_until_complete(main())