本帖最后由 18382747915 于 2019-4-17 16:04 编辑
你还在为斗图输了而懊恼嘛,你还在为找图到处瞎J8乱跑吗,python爬虫来啦!
今天为了跟别人斗图随便写的,哈哈,勿喷
[Python] 纯文本查看 复制代码 from os import mkdir
import requests,re
session=requests.session()
def bqb(title):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0',
'Host': 'www.fabiaoqing.com'
}
url="https://www.fabiaoqing.com/"
session.get(url=url,headers=header)
url="https://www.fabiaoqing.com/search/search/keyword/%s"%title
req= session.get(url=url,headers=header)
key=re.findall('<a class="item" href="/search/search/keyword/(.*?)/type/bq/page/(.*?).html">',req.text,re.S)[0][0]
html = re.findall('<a class="item" href="/search/search/keyword/%s/type/bq/page/(.*?).html">'%key, req.text, re.S)
b = {}
c = b.fromkeys(html)
liststr=list(c.keys())
liststr.append("1")
mkpath = "F:/%s/"%title
# 调用函数
try:
mkdir(mkpath)
except:
print("文件夹已存在")
count =0
for i in liststr:
url="https://www.fabiaoqing.com/search/search/keyword/%s/type/bq/page/%s.html"%(key,i)
req=session.get(url=url, headers=header)
html=req.text
html_img=re.findall('<img class="ui image bqppsearch lazy" data-original="(.*?)" title="(.*?)"',html,re.S)
html_img_link=html_img
for x in html_img_link:
count = count + 1;
print("正在下载第%s张"%count)
title=x[1]+str(count)
imgurl=x[0]
a = requests.get(imgurl).content
try:
with open('{}{}.jpg'.format(mkpath,title), 'wb')as wj:
wj.write(a)
except:
print()
print("下载成功")
if __name__=="__main__":
title=input("请输入表情包名称:")
bqb(title)
|