吾爱破解 - 52pojie.cn

 找回密码
 注册[Register]

QQ登录

只需一步,快速开始

查看: 4457|回复: 20
收起左侧

[讨论] selenium 实现91视频搜索下载 改进线程池 (聊胜于无)

[复制链接]
lihu5841314 发表于 2021-6-6 20:58
本帖最后由 lihu5841314 于 2021-6-6 21:56 编辑

[Asm] 纯文本查看 复制代码
import re,os
import  requests
from selenium import webdriver
from multiprocessing.dummy import  Pool
import time

chrome_options = webdriver.ChromeOptions();
# 添加浏览器参数
# 添加UA
chrome_options.add_argument(
'User-Agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"'
)
# 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.add_argument('--headless')
# 以最高权限运行
chrome_options.add_argument('--no-sandbox')
# 设置开发者模式启动,该模式下webdriver属性为正常值
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(chrome_options= chrome_options)

url = "https://www.91kanju.com/"

# driver = webdriver.Chrome()
rep1 = driver.get(url)
time.sleep(1)
name = input('请输入需要下载的电视剧电影名称:')
driver.find_element_by_xpath('//*[@id="ff-wd"]').send_keys(name)
time.sleep(1)
rep2 = driver.find_element_by_xpath('//*[@id="header-top"]/div/div/div/div[2]/div/form/button/i')
driver.execute_script('arguments[0].click();',rep2)
# #获取所有窗口
# current_window = driver.window_handles
# #切换到当前窗口
# driver.switch_to.window(current_window[0])
time.sleep(1)
rep3 = driver.find_element_by_xpath('/html/body/div/div[1]/div[1]/div/div/div[2]/ul/li/div[1]/a')
driver.execute_script('arguments[0].click();',rep3)
time.sleep(1)
rep4 = driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div/div[1]/div/div[2]/div[2]/a')
driver.execute_script('arguments[0].click();',rep4)
time.sleep(1)
movie_url = driver.current_url


headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36"
}

def page_response(url):
    rep = requests.get(url=url, headers=headers)
    rep.encoding = rep.apparent_encoding
    return rep

if not os.path.exists('./movie'):
    os.mkdir('./movie')
rep = page_response(movie_url)
driver.close()
driver.quit()
# 正则
obj = re.compile(r"url: '(?P<url>.*?)',", re.S)
# 拿到地址
m3u8_url = obj.search(rep.text).group("url")
# 下载m3u8文件 并保存到本地
resp2 = requests.get(m3u8_url)
with open("video.m3u8", mode="wb") as f:
    f.write(resp2.content)
# 解析m3u8文件
urls = []
cnt = 1
with open("video.m3u8", mode="r", encoding="utf-8") as f:
    for n in f:
        # 先去掉空白 换行之类
        n = n.strip()
        if n.startswith("#"):
             continue
        cnt = cnt + 1
        dic = {
            "name": str(cnt),
             "url" : n
        }
        urls.append(dic)

def  page_down_data(dic):
     path = "./movie/" + dic["name"] + ".ts"
     print(path, "正在下载......")
     url = dic["url"]
     data =page_response(url)
     with open(path, mode="wb")  as pf:
              pf.write(data.content)
              print(dic["name"],"下载完成")
pool = Pool(8)
data = pool.map(page_down_data,urls)
pool.close()
pool.join()

[Asm] 纯文本查看 复制代码
import re,os
import  requests
from selenium import webdriver
from multiprocessing.dummy import  Pool
import time

chrome_options = webdriver.ChromeOptions();
# 添加浏览器参数
# 添加UA
chrome_options.add_argument(
'User-Agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"'
)
# 浏览器不提供可视化页面. linux下如果系统不支持可视化不加这条会启动失败
chrome_options.add_argument('--headless')
# 以最高权限运行
chrome_options.add_argument('--no-sandbox')
# 设置开发者模式启动,该模式下webdriver属性为正常值
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(chrome_options= chrome_options)

url = "https://www.91kanju.com/"

# driver = webdriver.Chrome()
rep1 = driver.get(url)
time.sleep(1)
name = input('请输入需要下载的电视剧电影名称:')
driver.find_element_by_xpath('//*[@id="ff-wd"]').send_keys(name)
time.sleep(1)
rep2 = driver.find_element_by_xpath('//*[@id="header-top"]/div/div/div/div[2]/div/form/button/i')
driver.execute_script('arguments[0].click();',rep2)
# #获取所有窗口
# current_window = driver.window_handles
# #切换到当前窗口
# driver.switch_to.window(current_window[0])
time.sleep(1)
rep3 = driver.find_element_by_xpath('/html/body/div/div[1]/div[1]/div/div/div[2]/ul/li/div[1]/a')
driver.execute_script('arguments[0].click();',rep3)
time.sleep(1)
rep4 = driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div/div[1]/div/div[2]/div[2]/a')
driver.execute_script('arguments[0].click();',rep4)
time.sleep(1)
movie_url = driver.current_url

def page_response(url):
    rep = requests.get(url=url, headers=headers)
    rep.encoding = rep.apparent_encoding
    return rep

if not os.path.exists('./movie'):
    os.mkdir('./movie')
rep = page_response(movie_url)
driver.close()
driver.quit()
# 正则
obj = re.compile(r"url: '(?P<url>.*?)',", re.S)
# 拿到地址
m3u8_url = obj.search(rep.text).group("url")
# 下载m3u8文件 并保存到本地
resp2 = requests.get(m3u8_url)
with open("video.m3u8", mode="wb") as f:
    f.write(resp2.content)
# 解析m3u8文件
cnt = 1
with open("video.m3u8", mode="r", encoding="utf-8") as f:
    for n in f:
        # 先去掉空白 换行之类
        n = n.strip()
        if n.startswith("#"):
             continue
            # # 下载视频片段
        resp3 =page_response(n)
        path = './movie' +str(cnt) + ".ts"
        with open(path, mode="wb")  as pf:
              pf.write(resp3.content)
              cnt = cnt + 1
              print(cnt-1,"下载完成")

免费评分

参与人数 3吾爱币 +7 热心值 +2 收起 理由
未闻の花名 + 1 看来你我的91不是同一个91.
woyucheng + 1 + 1 谢谢@Thanks!
苏紫方璇 + 5 + 1 欢迎分析讨论交流,吾爱破解论坛有你更精彩!

查看全部评分

发帖前要善用论坛搜索功能,那里可能会有你要找的答案或者已经有人发布过相同内容了,请勿重复发帖。

aipojie_L 发表于 2021-6-6 21:06
91大神?
mosou 发表于 2021-6-6 21:21
罗萨 发表于 2021-6-6 21:28
a4299110 发表于 2021-6-6 21:29
mosou 发表于 2021-6-6 21:21
原来不是我想的那个91

懂得都懂
 楼主| lihu5841314 发表于 2021-6-6 21:49

那个91?   难道是书店找刘备的那个
dhluser 发表于 2021-6-6 22:02
lihu5841314 发表于 2021-6-6 21:49
那个91?   难道是书店找刘备的那个

91短视频,你值得拥有!!!!!!!!!!
小夜好坏 发表于 2021-6-6 22:12
我是看91进来的= =
loadream 发表于 2021-6-6 22:17
是我想多了。。。
wanwfy 发表于 2021-6-6 22:44
pyppeteer 异步,你值得拥有
您需要登录后才可以回帖 登录 | 注册[Register]

本版积分规则

返回列表

RSS订阅|小黑屋|处罚记录|联系我们|吾爱破解 - LCG - LSG ( 京ICP备16042023号 | 京公网安备 11010502030087号 )

GMT+8, 2024-11-26 02:40

Powered by Discuz!

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表