[Python] 纯文本查看 复制代码
import os
import re
import time
import requests
from bs4 import BeautifulSoup
n=1
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36 Edg/92.0.902.78",
"Connection":"close"}
k=input("你要爬取第几页")
url1=f'http://www.netbian.com/meinv/index_{k}.htm'
url2='http://www.netbian.com'
resp=requests.get(url1,headers=headers)
resp.encoding='gbk'
html=resp.text
main_page=BeautifulSoup(html,"html.parser")
alist=main_page.find("div",attrs={"class":"list"}).find_all("a",attrs={"target":"_blank"})
url=re.findall('<a href="(.*?)" title=".*?" target="_blank">',html) #从首页中获取子页面的路径
del(url[0])
url.pop()
j=0
for j in range(19):
url3=url2+url[j]
j+=1
resp2 = requests.get(url3, headers=headers)
resp2.encoding='gbk'
html2=resp2.text
child_page=BeautifulSoup(html2,"html.parser")
clist=child_page.find("div",attrs={"class":"pic"}).find_all("img")
for q in clist:
q1=q.get("src") #获取下载链接
if not os.path.exists('4k壁纸%s' % k):
os.mkdir(f'./4k壁纸%s' % k)
f = open(f'./4k壁纸%s/' % k + "pic_%s.jpg" % n, mode="wb")
tu = requests.get(q1,headers=headers)
tu.close()
f.write(tu.content)
time.sleep(1)
print("下载了%s张壁纸" % n)
n+=1 爬取彼岸桌面美女系列指定页图片
新手写的第一个爬虫,不足之处可以指出,感谢
|