[Asm] 纯文本查看 复制代码 from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import selenium.webdriver.support.expected_conditions as EC
import time,requests,os
from multiprocessing.dummy import Pool
url = 'https://bz.zzzmh.cn/index'
# 声明浏览器对象,将chromedriver驱动放在chrome浏览器安装目录下,指定驱动的绝对路径
browser = webdriver.Chrome(executable_path=r'C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe')
wait = WebDriverWait(browser, 10)
try:
browser.get(url)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'img-box'))) # 等待fc2e元素加载完成
keywords = browser.find_elements_by_class_name('img-box') # 获取元素
print(len(keywords))
imgs = []
for keyword in keywords:
try:
# wait.until(EC.presence_of_element_located((By.PARTIAL_LINK_TEXT, '保存'))) # 等待fc2e元素加载完成
# img_url= keyword.find_element_by_partial_link_text('保存').get_attribute('href') #定位方式一
#img_url= keyword.find_element(By.CSS_SELECTOR,'.down-span').find_element_by_tag_name('a').get_attribute('href') #定位方式二
img_url = keyword.find_element(By.XPATH, '../div/span[2]/a').get_attribute('href') #定位方式三
print(img_url)
imgs.append(img_url)
print(len(imgs))
except:
continue
finally:
browser.close()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36',
}
def res(url):
name = url.split('-')[-1]
resp = requests.get(url,headers=headers).content
path = 'imgs' + '/' +name
with open(path,'wb') as f:
f.write(resp)
print('下载完毕')
pool = Pool(4)
if not os.path.exists('imgs'):
os.makedirs('imgs')
pool.map(res,imgs)
pool.close()
pool.join()
print('over')
|