download:高级爬虫实战-零碎把握破解反爬技能 挑战高薪
import re
import urllib
import os
def rename(name):
name = name + '.jpg' return name
def getHtml(url):
page = urllib.urlopen(url) html = page.read() return html
def getImg(html):
reg = r'src="(.+?\.jpg)" pic_ext' imgre = re.compile(reg) imglist = re.findall(imgre,html) os.chdir("E:\\pic") os.getcwd() x=1 for imgurl in imglist: img=urllib.urlopen(imgurl) name=str(x) name = rename(name) print(name) x=x+1 f=open(name,'wb') f.write(img.read()) f.close()
html = getHtml("http://tieba.baidu.com/p/3553148164")
getImg(html)
print 'pic save!'