本帖最后由 huguo002 于 2019-5-9 20:37 编辑
python采集图片的例子,附带访问超时,异常处理!
python采集图片的例子,附带访问超时,异常处理!
异常链接已写入到text中!共3个文件!
[Python] 纯文本查看 复制代码 #爬取cssdesignawards.com图片
import requests
from lxml import etree
import os
global i
global cs_url
global cs_ur2
global cs_ur3
i=1
cs_url1=''
cs_url2=''
cs_url3=''
def tp(url):
global i
global cs_url
global cs_ur2
#url="https://www.cssdesignawards.com/sites/artbox/35171/"
try:
html=requests.get(url,timeout=10).text
nr=etree.HTML(html)
tpurl=nr.xpath('//*[@id="page"]/section[1]/div/div/figure/a/img/@src')
tpurl='https://www.cssdesignawards.com'+tpurl[0]
print(tpurl)
tpm=str(i)+tpurl[-4:]
except:
print(url+"-----访问超时!")
cs_url1 = url + '\r\n'
os.makedirs("./img/cssd/spider1.txt", exist_ok=True)
with open("./img/cssd/spider1.txt", 'a') as f:
f.write(cs_url1)
pass
os.makedirs("./img/cssd/", exist_ok=True)
try:
r=requests.get(tpurl,timeout=10)
with open("./img/cssd/"+tpm,'wb') as f:
f.write(r.content)
print(tpm + "----图片已保存!")
i = i + 1
except:
print(tpm+"-----访问超时"+tpurl)
cs_url2 = tpurl + '\r\n'
os.makedirs("./img/cssd/spider2.txt", exist_ok=True)
with open("./img/cssd/spider2.txt", 'a') as f:
f.write(cs_url2)
pass
def hqt(fenlei,num):
global cs_url3
for n in range(1,num+1):
ur=f'https://www.cssdesignawards.com/website-gallery?industry={fenlei}&page={n}'
print(ur)
try:
htm=requests.get(ur,timeout=10).text
#print(htm)
ljnr=etree.HTML(htm)
#print(ljnr)
ljjh=ljnr.xpath('//*[@id="page"]/section/div/div[1]/article/div/div[1]/div/a[1]/@href')
for lj in ljjh:
lj='https://www.cssdesignawards.com'+lj
print(lj)
tp(lj)
except:
print(ur + "-----访问超时!" )
cs_url3 = ur + '\r\n'
os.makedirs("./img/cssd/spider3.txt", exist_ok=True)
with open("./img/cssd/spider3.txt", 'a') as f:
f.write(cs_url3)
pass
if __name__ == "__main__":
hqt("app",9)
|