好友
阅读权限10
听众
最后登录1970-1-1
|
本帖最后由 ping1ggg 于 2017-3-12 13:27 编辑
#-*- coding:uft-8
import re
import urllib
import urllib2
def get_content(url):
"""doc."""
req = urllib2.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:46.0) Gecko/20100101 Firefox/46.0')
req.add_header('GET',url)
req.add_header('Host','tieba.baidu.com')#主机
req.add_header('Referer','http://tieba.baidu.com/f?kw=%E6%A1%8C%E9%9D%A2&tab=album&red_tag=a0654939890')
html = urllib2.urlopen(req)
content = html.read()
html.close()
return content
def get_images(info):
regex = r'https://imgsa.baidu.com/forum/abpic/item/.+.jpg'
pat = re.compile(regex)
images_code = re.findall(pat,info)
i = 0
for image_url in images_code:
print image_url
#urllib.urlretrieve(image_url,'%s.jpg' %i)
urllib.urlretrieve(image_url,'%s.jpg' %image_url[41:-4])
i+=1
url = "https://tieba.baidu.com/p/1457328460#!/l/p1"
content = get_content(url)
#info = content.replace('\\','')
#print info
get_images(content) |
免费评分
-
查看全部评分
|
发帖前要善用【论坛搜索】功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。 |
|
|
|
|