python脚本获取桌酷壁纸

zyjsuper 发表于 2022-3-12 18:05

"""
************************************
@version: 0.1
@author: zayki
@contact: zyj_super@outlook.com
@file: zhuoku_spider.py
@time: 2022/2/27 15:24
************************************
"""

import os
import requests
from lxml import etree
from os.path import splitext,dirname,basename
import re

target_url = "http://www.zhuoku.com/"
headers = {
   "Upgrade-Insecure-Requests": "1",
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
   "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
   "Accept-Encoding": "gzip, deflate",
   "Accept-Language": "zh-CN,zh;q=0.9",
   "Connection": "close"
}

response = requests.get(target_url, headers = headers,verify = False,timeout = 5)
result = etree.HTML(response.content.decode('GBK'))
last_bizhi_urls = result.xpath('//div[@id="bizhititle"]//li//@href')
last_bizhi_titles = result.xpath('//div[@id="bizhititle"]//li//a')
pic_urls = []
savepath = os.getenv("USERPROFILE") + "\\desktop\\"

def get_proxy():
return requests.get("http://118.24.52.95:5010/get/").json()          # 参考https://github.com/jhao104/proxy_pool搭建一个获取代{过}{滤}理的服务器即可。

def get_pic_urls(url):
sub_url = target_url + url
pic_urls.append(sub_url)
proxy_ip = get_proxy()['proxy']
print(proxy_ip)
proxies = {"https://":proxy_ip,"http://":proxy_ip}
print(sub_url)
subresponse = requests.get(sub_url, headers=headers, verify=False,proxies = proxies, timeout=5)
subresult = etree.HTML(subresponse.content.decode('GBK'))
last_link_name = subresult.xpath('//div[@class="turn"]//select//@value')[-1]
last_link = dirname(sub_url)+ '/' + last_link_name
link_name, countlinks = splitext(last_link_name).split('_'),splitext(last_link_name).split('_')[-1]
last_link_response = requests.get(last_link, headers=headers, verify=False, proxies = proxies, timeout=5)
last_link_result = etree.HTML(last_link_response.content.decode('GBK'))
last_pic_link = last_link_result.xpath('//div[@class="bizhiin"]//a/@href')[-1]

p1 = re.compile(r'[(](.*?)[)]', re.S)
pic_links_count = re.findall(p1, last_pic_link)

for index in range(1,int(pic_links_count) + 1):
   try:
         proxy_ip = get_proxy()['proxy']
         print(proxy_ip)
         proxies = {"https://": proxy_ip, "http://": proxy_ip}
         pic_url_link = dirname(sub_url) +"/" + link_name + "(" + str(index) + ")" + ".htm"
         pic_url_link_resq = requests.get(pic_url_link, headers=headers, verify=False,proxies = proxies,timeout=5)
         pic_url_link_html= etree.HTML(pic_url_link_resq.content.decode('GBK'))
         target_pic_link = pic_url_link_html.xpath('//img[@id="imageview"]//@src')

         headers_pic = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
            "Accept": "*/*",
            "Referer": pic_url_link,
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "close"
         }

         target_pic_link_resq = requests.get(target_pic_link, headers=headers_pic, verify=False,proxies = proxies,timeout=5)
         file_name = basename(target_pic_link)
         try:
            os.mkdir(savepath + link_name)
         except:
            pass
         try:
            with open(savepath + link_name + "\\" + file_name,'wb') as file:
               file.write(target_pic_link_resq.content)
               print(target_pic_link)
               print("The image {0} has been saved in local path {1}".format(file_name,savepath + link_name))
         except Exception as e:
            print("index:" + index +" "+ str(e))
   except Exception as e:
         print(e)

url = last_bizhi_urls
get_pic_urls(url)

zyjsuper 发表于 2022-3-16 13:33

Appled 发表于 2022-3-14 10:13
由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))楼主这是怎么回事呀

代{过}{滤}理换成自己有效的代{过}{滤}理地址，我自己的代{过}{滤}理也失效了。

hiviki 发表于 2022-5-14 21:07

https://www.52pojie.cn/thread-1094316-1-1.html
大佬您好，你的这篇帖子，可以给出最新版本的关键修改地址吗？小白不会，帖子中有人给出的补丁是DLL文件，太容易报毒了

jak123 发表于 2022-3-12 18:18

slbcmgn 发表于 2022-3-12 18:34

楼主辛苦了

janth 发表于 2022-3-12 18:42

辛苦辛苦了

a354058 发表于 2022-3-12 20:44

学习了，谢谢分享

chen75054138 发表于 2022-3-13 11:54

学习了，谢谢分享

Appled 发表于 2022-3-14 10:13

由于连接方在一段时间后没有正确答复或连接的主机没有反应，连接尝试失败。',))楼主这是怎么回事呀

Appled 发表于 2022-3-16 22:30

感谢🙏

Appled 发表于 2022-3-16 22:31

非常感谢楼主，很长不错

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

python脚本获取桌酷壁纸