简介

类似于selenium,pyppeteer也能渲染网页,但是它是异步的。

使用方法

安裝方法

pip install pyppeteer

# python 3.7.5import asynciofrom pyppeteer import launchfrom pyquery import PyQuery as pqasync def main():    browser = await launch()    page = await browser.newPage()    await page.goto("http://quotes.toscrape.com/js/")    doc = pq(await page.content())    print("Quotes:", doc(".quote").length)    await browser.close()asyncio.run(main())

复杂案例,屏蔽css,图片,字体等

import asynciofrom pyppeteer import launchfrom pyquery import PyQuery as pqclass Global:    browser = Noneasync def intercept_request(req):    """屏蔽几类资源"""    if req.resourceType in ["image", "media", "eventsource", "websocket", "stylesheet", "font"]:        await req.abort()    else:        await req.continue_()async def fetch():    page = await Global.browser.newPage()    await page.setUserAgent(        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "        "(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299"    )  # 自定义useragent    await page.setViewport({"width": 1080, "height": 960})    await page.setRequestInterception(True)    page.on("request", intercept_request)    await page.goto("https://juejin.im/timeline")    await asyncio.sleep(3)    doc = pq(await page.content())    print("Quotes:", doc("a").length)    await page.close()async def main():    Global.browser = await launch()    await asyncio.gather(*[fetch() for _ in range(10)])  # 并发    await Global.browser.close()asyncio.get_event_loop().run_until_complete(main())