爬取养眼图片
import webbrowserdef click():
webbrowser.open(path)
path = "https://www.tujigu.com/a/28708"
click()
import re
import urllib.request as request
import random
import os
import time
import PIL#Pycharm 下面没安装成功,安装Pillow后可以调用PIL模块了
from PIL import Image
import glob
def open_url(url):
req = request.Request(url)
ua = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
# chrome
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
# 搜狗
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",# 360
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0"# 火狐
]
ipdrass = ['211.137.52.158:8080','222.240.184.126:8086',"211.137.52.158:8080",
"60.13.42.128:9999","124.117.101.56:8118"]
req.add_header("User-Agent",random.choice(ua) )
proxy = ipdrass
proxy = random.choice(proxy)
proxy_support = request.ProxyHandler({'http': proxy})
opener = request.build_opener(proxy_support)
request.install_opener(opener)
response = request.urlopen(url)
html = response.read().decode('utf-8')
return html
def get_img(html):
print("连接网络成功!")
#p = r'<img src="([^"]+\.jpg)" alt='
#https://www.tujigu.com/bigimg.html?img=https://lns.hywly.com/a/1/33741/66.jpg
ps1 = r'\d.html">(.*?)</a> <a class'
pages = re.findall(ps1,html)
pagess = re.findall('(\d+)', pages[0])[-1]
img_list = []
img_list.append(url)
for i in range(2,int(pagess)+1):
img_html = url +"/"+ str(i) + ".html"
img_list.append(img_html)
print(img_list)
# ps = r'<a href="(.*?)">\d</a>'
# #imglist = re.findall(p,html)
# pages = re.findall(ps,html)
# pages = list(set(pages))
# pages.append(url)
for a in img_list:
a = open_url(a)
p = r'<img src="([^"]+\.jpg)" alt='
imglist = re.findall(p, a)
img_down(imglist)
time.sleep(1)
def img_down(imglist):
for each in imglist:
path = downs
if not os.path.exists(path):
os.makedirs(path)
filename = path + each.split("/")[-1]
request.urlretrieve(each, filename, None)
print(each.split("/")[-1]+"图片正在下载")
def Resize_Image(downs):
myfiles = glob.glob(downs+"*.jpg")
i = 1
for f in myfiles:
namespilt0 = f.split("\\")
outname0 = downs + namespilt0[-1]
img = Image.open(f)
h0 = float(img.size[1])
w0 = float(img.size[0])
img_new = img.resize((int(w0 /fix), int(h0 /fix)), PIL.Image.ANTIALIAS)
img_new.save(outname0)
i = i + 1
if __name__ == '__main__':
url = 'https://www.tujigu.com/a/34706'#https://www.tujigu.com/a/28708
print("正在连接网络。。。。。。")
files = "009"
fix = 1
downs = "C:\\Users\\86184\\Documents\\Rainmeter\\Skins\mmimg\\" + files + "\\"
get_img(open_url(url))
Resize_Image(downs)
print("图片下载完成!") 感谢分享,https://www.tujigu.com/ aoenian 发表于 2020-9-13 09:57
只是知道很厉害,虽然要求有点过分了,但是如果可以在方法里面简单写一下干嘛的可能对我们这些门边汉就更好 ...
files = "009"是存的文件夹
fix = 1 是修改图片的大小,4是缩小4倍
downs 是保存的绝对路径
然后直接修改这是哪个就OK了 完全看不懂啊 这个代码复制到python的界面就可以了吗 看不明白啊{:1_908:} zch1504139 发表于 2020-9-12 21:43
完全看不懂啊 这个代码复制到python的界面就可以了吗
是的呢{:1_926:}安装好其他包 Aska 发表于 2020-9-12 21:54
看不明白啊
按照步骤写就对了,1伪装2找到图片地址3保存 跟各位兄弟的想法一样,公式是真不错但是咋用呀。。 看这个干嘛?看真的不行? 我有一百多G的图片, 反正看不懂{:1_893:}