Python Selenium 百度云分享链接资源 批量保存
本帖最后由 helian147 于 2019-11-21 15:46 编辑参考链接https://blog.csdn.net/u010730731/article/details/83594103 (xls格式)
找了个docx的批量百度云链接,转存到自己的网盘。
修改的地方:
1、百度云修改过了:driver.find_element_by_id("mwxxPOD")
2、从docx读取
3、driver = webdriver.Chrome(path),path填入自己的chromedriver地址,注意是文件地址,不是目录地址。且与你安装的chrome版本匹配的webdriver,并添加到环境变量中。
4、登陆时目前有滑块验证,代码中没有,自己手动拉一下:shutup:,还没搞定。
chroemdriver下载:
https://chromedriver.storage.googleapis.com/index.html
# -*- coding: UTF-8 -*-
from docx import Document #安装 pip install python-docx
import re
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
import time
# 凡是出现sleep的,都是因为网络等原因加载过慢,需要等一等
# 登录云盘
def login(driver,username,password):
orgin_url = 'https://pan.baidu.com/'
driver.get(orgin_url)
try:
time.sleep(5)
elem_static = driver.find_element_by_id("TANGRAM__PSP_4__footerULoginBtn")
elem_static.click()
time.sleep(0.5)
elem_username = driver.find_element_by_id("TANGRAM__PSP_4__userName")
elem_username.clear()
elem_username.send_keys(username)
elem_userpas = driver.find_element_by_id("TANGRAM__PSP_4__password")
elem_userpas.clear()
elem_userpas.send_keys(password)
elem_submit = driver.find_element_by_id("TANGRAM__PSP_4__submit")
elem_submit.click()
time.sleep(5)
except NoSuchElementException:
pass
# 将加密分享的文件保存到自己云盘的目录下
def extract(driver,srcurl,srcpwd):
driver.get(srcurl)
try:
getpwd = driver.find_element_by_id("mwxxPOD")#("esDEV5")
getpwd.send_keys(srcpwd)
getButton = driver.find_element_by_link_text("提取文件")
getButton.click()
time.sleep(10)
# 目前有两种情况
# 一:分享文件是一压缩包
# 二:分享的是一路径
try:# 全选(情况二)
selectall = driver.find_element_by_class_name("zbyDdwb")
selectall.click()
except NoSuchElementException:
file_name = "no_zbyDdwb.png"
driver.save_screenshot(file_name)
driver.get_screenshot_as_file(file_name)
pass
savetodisk = driver.find_element_by_link_text("保存到网盘")
savetodisk.click()
time.sleep(5)
# AA 为指定的自己网盘保存路径
selectdir = driver.find_element_by_xpath("//span[@node-path='/AA']")
selectdir.click()
enter = driver.find_element_by_link_text("确定")
enter.click()
time.sleep(2)
except NoSuchElementException:
file_name = "no_such_element.png"
driver.get_screenshot_as_file(file_name)
pass
def read_docx(path):
fulltext=[]
list1=[]
listUrl=[]
listpwd=[]
#生成word对象,并指向word文件
doc=Document(path)
#读取对象的全部内容,不包括图片和表格
paras=doc.paragraphs
#将每行数据存入列表
for p in paras:
fulltext.append(p.text)
for i, element in enumerate(fulltext):
if element.find("https:")!=-1:
list1.append(i)
for j in range(len(list1)):
k =list1
link = re.findall(r'.*(https.*)',fulltext.replace(u"\xa0",u"").replace(" ",""))
listUrl.append(link)
pwd = re.findall('{4}',fulltext)
listpwd.append(pwd)
#print(len(listUrl),len(listpwd))
return listUrl,listpwd
# 调用执行
def doWork():
# 存放链接的文件
path=r'文件地址***.docx'
listUrl,listpwd= read_docx(path)
driver = webdriver.Chrome(executable_path='D:\Program10\python37\chromedriver\chromedriver')
login(driver,"网盘帐户名***","密码***")
for index in range(len(listUrl)):
srcurl = listUrl
srcpwd = listpwd
extract(driver,srcurl,srcpwd)
driver.quit()
if __name__ == '__main__':
doWork() Python 可真是啥都能搞 嗯 办公自动化? 登录部分楼主可以参考我的代码思路,手写轨迹,一个容易过的点,一般登陆都会在60s内完成。
from selenium.webdriver import ActionChains
from selenium import webdriver
import time
start_time = time.time()
options = webdriver.ChromeOptions()
options.add_argument("log-level=3")
driver = webdriver.Chrome(options=options)
driver.get('https://pan.baidu.com')
time.sleep(2)
driver.find_element_by_xpath('//*[@id="TANGRAM__PSP_4__footerULoginBtn"]').click()
driver.find_element_by_xpath('//*[@id="TANGRAM__PSP_4__userName"]').send_keys('')
driver.find_element_by_xpath('//*[@id="TANGRAM__PSP_4__password"]').send_keys('')
driver.find_element_by_xpath('//*[@id="TANGRAM__PSP_4__submit"]').click()
time.sleep(1.5)
slid_ing = driver.find_element_by_class_name('vcode-spin-button')
while True:
ActionChains(driver).click_and_hold(on_element=slid_ing).perform()
time.sleep(0.2)
for track in :
ActionChains(driver).move_by_offset(xoffset=track, yoffset=0).perform()
try:
ActionChains(driver).release(on_element=slid_ing).perform()
except:
break
time.sleep(1.5)
cookies = driver.get_cookies()
print(cookies)
end_time = time.time()
print(f'耗时:{int(end_time-start_time)}s')
driver.quit() 学习借鉴一下,谢谢分享 Hatsune_miku 发表于 2019-11-21 16:03
登录部分楼主可以参考我的代码思路,手写轨迹,一个容易过的点,一般登陆都会在60s内完成。
谢谢,我试试 打开保存网盘过一段时间就会出验证码
页:
[1]