萌新练手,大佬滤过
https://www.aliyundrive.com/s/YhdcXxYwDLc 提取码: l0z3点击链接保存,或者复制本段内容,打开「阿里云盘」APP ,无需下载极速在线查看,视频原画倍速播放。
[Python] 纯文本查看 复制代码 import os
import time
import requests
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/100.0.4896.75 Safari/537.36 Edg/100.0.1185.36 '
}
response = requests.get('https://pvp.qq.com/web201605/herolist.shtml', headers=headers)
response.encoding = 'gb18030'
soup = BeautifulSoup(response.text, 'lxml')
ul = soup.find('ul', {'class': 'herolist clearfix'})
a = ul.find_all('a')
for a in a:
href = a.attrs['href']
options = ChromeOptions() # 无可视化界面操作
options.add_argument('--headless')
options.add_argument('--dissable-gpu')
options.add_experimental_option('excludeSwitches', ['enable-outomation']) # 规避检测
driver = webdriver.Chrome(options=options)
driver.get('https://pvp.qq.com/web201605/' + href)
response1 = driver.page_source
driver.quit()
soup = BeautifulSoup(response1, 'lxml')
h2_text = soup.find('h2', {'class', 'cover-name'}).get_text() # 获取标签文本值
if not os.path.exists(rf'D:\__webCrawler\王者荣耀\{h2_text}'):
os.makedirs(rf'D:\__webCrawler\王者荣耀\{h2_text}')
ul = soup.find('ul', {'class', 'pic-pf-list pic-pf-list3'})
img = ul.find_all('img')
for img in img:
data_img = img.attrs['data-imgname']
data_title = img.attrs['data-title']
content = requests.get('http:' + data_img, headers=headers).content
print(f'--->>>开始下载 {h2_text}_{data_title}.jpg')
with open(rf'D:\__webCrawler\王者荣耀\{h2_text}\{data_title}.jpg', 'wb') as fp:
fp.write(content)
print(f'--->>>下载成功 {h2_text}_{data_title}.jpg')
time.sleep(1)
time.sleep(1)
print('--->>>全部下载成功<<<---')
|