[Asm] 纯文本查看 复制代码 from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
import selenium.webdriver.support.expected_conditions as EC
import time,requests,os
from multiprocessing.dummy import Pool
url = 'https://www.renrendoc.com/paper/137639109.html'
# 声明浏览器对象,将chromedriver驱动放在chrome浏览器安装目录下,指定驱动的绝对路径
browser = webdriver.Chrome(executable_path=r'C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe')
wait = WebDriverWait(browser, 10)
try:
browser.get(url)
for i in range(2):
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'fc2e'))) # 等待fc2e元素加载完成
keyword = browser.find_element_by_class_name('fc2e') # 获取元素
browser.execute_script('arguments[0].click();',keyword) # 用selenium的js点击元素 当然也可以用.clink(有些个别的点不了)
time.sleep(2)
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)') #将滚动条滚到底部 玩法很多比如豆瓣电影排行榜的ajax就可以用这个爬
time.sleep(1)
wait.until(EC.presence_of_element_located((By.ID, 'page')))
doc = browser.find_element_by_id('page')
docs = doc.find_elements_by_tag_name('img')
imgs = []
for i in docs:
img_url = i.get_attribute('src')
imgs.append(img_url)
finally:
browser.close()
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36',
}
def res(url):
name = url.split('-')[-1]
resp = requests.get(url,headers=headers).content
path = 'imgs' + '/' +name
with open(path,'wb') as f:
f.write(resp)
print('下载完毕')
pool = Pool(4)
if not os.path.exists('imgs'):
os.makedirs('imgs')
pool.map(res,imgs)
pool.close()
pool.join()
print('over')
|