好友
阅读权限10
听众
最后登录1970-1-1
|
本帖最后由 banro512 于 2022-3-23 21:40 编辑
原帖 被删了
您的主题基于pyqt5的抖音作者全部视频批量采集软件--原创区不得发布采集软件
附带上源码发在这里,有需要的可自行编译
实现的功能
1. 按作者批量无水印下载
2. 按单个视频下载
3. GUI软件界面
技术栈 python3 + pyqt5 + playwright + requests
源码安装方法
1. 建立个空文件夹,打开 cmd.exe 进入该文件夹,分别执行 如下命令
[PowerShell] 纯文本查看 复制代码
python -m venv venv # 创建虚拟环境
cd venv/scripts/
activate # 激活虚拟环境
cd ../..
pip install --upgrade pip
pip install pyqt5
pip install requests
pip install pyinstaller
pip install playwright
set PLAYWRIGHT_BROWSERS_PATH=0
playwright install chromium
后面3条命令,务必按照顺序执行,否则可能出现一些奇怪问题
pyqt5 安装时可能会遇到一些版本对齐问题,请自行百度
2. 先创建下述空文件
创建 douyin.py 空文件,
创建 douyinui.py 空文件
创建 lib文件夹,并在lib文件夹下创建 douyinwr.py 空文件,config.py 空文件,__init__.py 空文件(这个文件始终为空即可)
3. 分别复制下属代码到各个py文件中
1. douyin.py 代码,这是主入口文件
[Python] 纯文本查看 复制代码
# -*- coding:utf-8 -*-
# 抖音短视频采集
import time
import requests
from PyQt5.QtCore import Qt, QThread, pyqtSignal
from lib import douyinwr as douyin
from lib import config
import threading
import os
import sys
import re
from PyQt5.QtWidgets import QApplication, QMainWindow, QMessageBox
import douyinui
# 另一个线程,用来触发更新进度反馈
class WorkThread(QThread):
# 自定义信号对象。参数str就代表这个信号可以传一个字符串
trigger = pyqtSignal()
def __init__(self):
super(WorkThread, self).__init__()
def run(self):
while 1:
print("qthread-run")
# 发送结束信号
if len(config.message)>0:
# 通过自定义信号把待显示的字符串传递给槽函数
self.trigger.emit()
time.sleep(3)
# 更新 显示结果
def showresult():
if len(config.message)>0:
myUi.textel.append(config.message.pop(0))
myUi.textel.verticalScrollBar().setValue(myUi.textel.verticalScrollBar().maximum())
# 提示
def toast(title, text):
box = QMessageBox(QMessageBox.Warning, title, text)
box.addButton("知道了", QMessageBox.YesRole)
box.exec_()
# 检测网址是否正确
def getrighturl(ztext,vtext):
result={"msg":"","zurl":"","vurl":""}
if len(ztext) > 0:
g = re.findall(r'.*?(https://v\.douyin\.com/\w+/?).*?', ztext)
g2 = re.findall(r'.*?(https://www\.douyin\.com/user/[\w\-]+[\?/]?).*?', ztext)
if g and len(g) > 0:
result['zurl'] = g[0]
elif g2 and len(g2) > 0:
result['zurl'] = g2[0]
else:
result["msg"] += "作者主页url不正确,请在抖音app-右上角-分享主页-复制链接\n"
if len(vtext) > 0:
g = re.findall(r'.*?(https://www\.douyin\.com/video/\d+)\??.*?', vtext)
g2 = re.findall(r'.*?(https://v\.douyin\.com/\w+/?).*?', vtext)
if g and len(g) > 0:
result["vurl"] = g[0]
elif g2 and len(g2) > 0:
result["vurl"] = g2[0]
else:
result["msg"] += "视频地址url不正确,请在抖音app-视频播放页-分享按钮-复制链接\n"
if not result["zurl"] and not result["vurl"]:
result["msg"]+="作者主页和视频地址,至少填写一个"
return result
# 开始按钮绑定函数
def startfun(myUi):
if not config.Yescan:
toast("不符合执行条件","请按照下方提示配置环境")
return
if config.runing:
config.runing = False
myUi.startbtn.setText("已停止,点击重新开始")
return False
reslist=getrighturl(myUi.zuozheinput.text().strip(),myUi.videoinput.text().strip())
if reslist["msg"]:
toast("出错了", reslist["msg"])
return False
# 删掉错误消息
config.runing = True
myUi.startbtn.setText("运行中,点击停止")
if "vurl" in reslist and len(reslist['vurl'])>5:
config.videourls.append(reslist["vurl"])
print(reslist)
# return
# 开始运行
if "zurl" in reslist and len(reslist['zurl'])>5:
# 从作者主页获取视频链接
threading.Thread(target=douyin.geturlbyzuozhe, args=(reslist["zurl"],), daemon=True).start()
# 根据视频链接获取视频
threading.Thread(target=douyin.runfun, daemon=True).start()
# 下载线程 3 个
threading.Thread(target=douyin.download, daemon=True).start()
threading.Thread(target=douyin.download, daemon=True).start()
threading.Thread(target=douyin.download, daemon=True).start()
config.message.append("已开始执行,请等待...")
if __name__=="__main__":
app = QApplication(sys.argv)
myMainWindow = QMainWindow()
myUi = douyinui.Ui_MainWindow(WorkThread())
myUi.setupUi(myMainWindow)
myUi.startbtn.clicked.connect(lambda: startfun(myUi))
myUi.opendir.clicked.connect(lambda: os.system('start explorer ' + os.path.join(config.rootDir, "video")))
try:
myMainWindow.show()
myUi.work.start()
# 线程自定义信号连接的槽函数
myUi.work.trigger.connect(showresult)
sys.exit(app.exec_())
except Exception as e:
print(e)
2. douyinui.py 界面布局文件
[Python] 纯文本查看 复制代码 # -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'douyinui.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def __init__(self, worker):
self.work = worker
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(873, 661)
icon = QtGui.QIcon()
icon.addPixmap(QtGui.QPixmap("dy.png"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
MainWindow.setWindowIcon(icon)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.formLayout_3 = QtWidgets.QFormLayout(self.centralwidget)
self.formLayout_3.setObjectName("formLayout_3")
self.formLayout = QtWidgets.QFormLayout()
self.formLayout.setObjectName("formLayout")
self.zuozheel = QtWidgets.QLabel(self.centralwidget)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.zuozheel.sizePolicy().hasHeightForWidth())
self.zuozheel.setSizePolicy(sizePolicy)
self.zuozheel.setMinimumSize(QtCore.QSize(0, 35))
self.zuozheel.setObjectName("zuozheel")
self.formLayout.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.zuozheel)
self.zuozheinput = QtWidgets.QLineEdit(self.centralwidget)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.zuozheinput.sizePolicy().hasHeightForWidth())
self.zuozheinput.setSizePolicy(sizePolicy)
self.zuozheinput.setMinimumSize(QtCore.QSize(0, 35))
self.zuozheinput.setSizeIncrement(QtCore.QSize(0, 35))
self.zuozheinput.setObjectName("zuozheinput")
self.formLayout.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.zuozheinput)
self.formLayout_3.setLayout(0, QtWidgets.QFormLayout.SpanningRole, self.formLayout)
self.formLayout_2 = QtWidgets.QFormLayout()
self.formLayout_2.setObjectName("formLayout_2")
self.videoel = QtWidgets.QLabel(self.centralwidget)
self.videoel.setMinimumSize(QtCore.QSize(0, 35))
self.videoel.setObjectName("videoel")
self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.LabelRole, self.videoel)
self.videoinput = QtWidgets.QLineEdit(self.centralwidget)
self.videoinput.setMinimumSize(QtCore.QSize(0, 35))
self.videoinput.setText("")
self.videoinput.setObjectName("videoinput")
self.formLayout_2.setWidget(0, QtWidgets.QFormLayout.FieldRole, self.videoinput)
'''
self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setMinimumSize(QtCore.QSize(0, 35))
self.label.setObjectName("label")
self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.LabelRole, self.label)
#self.videoNums = QtWidgets.QLineEdit(self.centralwidget)
#self.videoNums.setMinimumSize(QtCore.QSize(0, 35))
#self.videoNums.setObjectName("videoNums")
self.formLayout_2.setWidget(1, QtWidgets.QFormLayout.FieldRole, self.videoNums)
'''
self.formLayout_3.setLayout(1, QtWidgets.QFormLayout.SpanningRole, self.formLayout_2)
self.startbtn = QtWidgets.QPushButton(self.centralwidget)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.startbtn.sizePolicy().hasHeightForWidth())
self.startbtn.setSizePolicy(sizePolicy)
self.startbtn.setMinimumSize(QtCore.QSize(150, 40))
self.startbtn.setObjectName("startbtn")
self.formLayout_3.setWidget(4, QtWidgets.QFormLayout.LabelRole, self.startbtn)
self.opendir = QtWidgets.QPushButton(self.centralwidget)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.opendir.sizePolicy().hasHeightForWidth())
self.opendir.setSizePolicy(sizePolicy)
self.opendir.setMinimumSize(QtCore.QSize(200, 40))
self.opendir.setStyleSheet("background:rgba(0,0,0,0);\n"
"border:none")
self.opendir.setObjectName("opendir")
self.formLayout_3.setWidget(4, QtWidgets.QFormLayout.FieldRole, self.opendir)
self.textel = QtWidgets.QTextEdit(self.centralwidget)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.textel.sizePolicy().hasHeightForWidth())
self.textel.setSizePolicy(sizePolicy)
self.textel.setMinimumSize(QtCore.QSize(0, 400))
self.textel.setStyleSheet("background:rgba(0,0,0,0);")
self.textel.setFrameShape(QtWidgets.QFrame.NoFrame)
self.textel.setObjectName("textel")
self.formLayout_3.setWidget(5, QtWidgets.QFormLayout.SpanningRole, self.textel)
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 873, 26))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "抖音采集"))
MainWindow.setStatusTip(_translate("MainWindow", "抖音采集"))
self.zuozheel.setText(_translate("MainWindow", "作者主页url:"))
self.zuozheinput.setPlaceholderText(_translate("MainWindow", "抖音app-右上角点击-分享主页-复制链接"))
self.videoel.setText(_translate("MainWindow", "单个视频url:"))
self.videoinput.setPlaceholderText(_translate("MainWindow", "抖音app-分享按钮-复制链接"))
#self.label.setText(_translate("MainWindow", "爬取数量:"))
#self.videoNums.setPlaceholderText(_translate("MainWindow", "从作者主页爬取的视频数量,默认500"))
self.startbtn.setText(_translate("MainWindow", "开始"))
self.opendir.setText(_translate("MainWindow", "打开视频目录"))
3. lib/douyinwr.py 实际执行文件
[Python] 纯文本查看 复制代码 import threading
import urllib.parse
import requests
from playwright.sync_api import sync_playwright
from . import config
import re
import os
import time
import json
config.type="playwright"
config.Yescan=True
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'
js="""
Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});
"""
# 获取url响应内容的 json 数据
def on_response(response):
try:
if re.match(r'https://www\.douyin\.com/aweme/v1/web/aweme/post/.*?device_platform=webapp',response.url):
body=json.loads(response.body())
if body and "aweme_list" in body:
for it in body["aweme_list"]:
info={
"title":it["desc"],
"url":it["video"]["play_addr"]["url_list"][0]
}
print(info)
config.downloads.append(info)
except Exception as e:
print(e)
# 在作者主页,第一次获取去前10个视频页面地址,然后通过 response 事件获取ajax的json列表
def geturlbyzuozhe(url):
config.message.append("开始获取作者主页%s的视频地址" % url)
if not re.match("^https?:\/\/",url):
print("url无效:%s"%url)
return False
with sync_playwright() as p:
browser = p.chromium.launch(
headless=True,
slow_mo=10,
args=['--disable-blink-features=AutomationControlled','--user-agent=%s'%user_agent,'--force-webrtc-ip-handling-policy=disable_non_proxied_udp']
)
context=browser.new_context()
page = context.new_page()
page.add_init_script(js);
page.on('response', on_response)
page.route("**/*.{png,jpg,jpeg}",lambda route:route.abort())
page.route("**/*",lambda route:route.abort() if route.request.resource_type=="image" else route.continue_())
page.goto(url)
page.wait_for_load_state('networkidle')
print("current_url=" + page.url)
# 如果是视频播放页url
if re.match(r'https://www.douyin.com/video/\d+', page.url):
config.videourls.append(page.url)
return
# 循环检测是否还有新内容,滚动到底部
pagesize=0
while True:
if not config.runing:
config.message.append("已停止")
context.close()
browser.close()
return
try:
# 关闭登录框
page.locator(".dy-account-close").click()
except Exception:
pass
#第一页获取,其他监听
pagesize+=1
if pagesize==1:
hrefs = page.locator('//*[@id="root"]//ul[1]/li/a[contains(@href,"/video/")]').element_handles()
print("hrefs-length=%s" % len(hrefs))
for href in hrefs:
if not config.runing:
config.message.append("已停止")
context.close()
browser.close()
return
u = "https:" + href.get_attribute("href")
if not u in config.videourls:
config.videourls.append(u)
config.message.append("\n获取到url:%s" % u)
if page.locator("'暂时没有更多了'").count() > 0:
config.message.append("获取作者的视频结束了")
break
page.keyboard.press("End")
time.sleep(5)
context.close()
browser.close()
print("待下载列表 download lenght=%s"%len(config.downloads))
# 根据config.videourls 仅将 title和url存到 config.downloads 里
def runfun():
if not config.runing:
config.message.append("已停止")
return
config.message.append("\n开始从视频播放页获取信息")
# 循环获取视频播放页的title和视频下载url
while True:
# 停止
if not config.runing:
config.message.append("已停止")
break
#没有链接了,暂停15s
if len(config.videourls)>0:
url = config.videourls.pop()
else:
print("没有链接了,暂停15s")
time.sleep(10)
continue
src=""
if not re.match(r'https://www.iesdouyin.com/web/api/v2/aweme',url):
#不是视频播放地址,继续
mts=re.match(r'https://www.douyin.com/video/(\d+)', url)
if not mts:
# 如果是作者地址,获取开启线程
if re.match(r'https://www\.douyin\.com/user/[\w\-]+', url):
# 从作者主页获取视频链接
threading.Thread(target=geturlbyzuozhe, args=(url,), daemon=True).start()
continue
res=requests.get(url,headers=config.headers)
mts=re.match(r'https://www.douyin.com/video/(\d+)', res.url)
if not mts or not mts[1]:
# 如果是作者地址,获取开启线程
if re.match(r'https://www\.douyin\.com/user/[\w\-]+', url):
# 从作者主页获取视频链接
threading.Thread(target=geturlbyzuozhe, args=(url,), daemon=True).start()
continue
src="https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=%s"%mts[1]
else:
src=url
# https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=6856737027619114247
try:
print("url1="+mts[1])
res=requests.get(src,headers=config.headers)
body=json.loads(res.text)
src=body['item_list'][0]['video']['play_addr']['url_list'][0].replace('/playwm/','/play/')
text=body['item_list'][0]['desc']
print({"title":text,"url":src})
config.downloads.append({"title":text,"url":src})
except Exception as e:
print(e)
# 使用requests 下载视频
def download():
if not os.path.exists(config.saveDir):
os.mkdir(config.saveDir)
while True:
if not config.runing:
config.message.append("已停止")
return
if len(config.downloads)<1:
time.sleep(15)
continue
it=config.downloads.pop(0)
mp4="%s.mp4"%re.sub(r'[ #*.?&@!$%^()|]', "", it["title"]).strip()
mp4file=os.path.join(config.saveDir, mp4)
if os.path.exists(mp4file):
config.message.append("已存在:%s" % mp4file)
continue
try:
config.message.append("\n开始下载视频文件:%s" % mp4file)
rep = requests.get(it["url"], headers=config.headers)
with open(mp4file, "wb") as f:
f.write(rep.content)
config.message.append("\n下载成功:%s" % mp4file)
except Exception as e:
config.message.append("\n下载视频文件失败了====")
config.downloads.append(it)
logname=time.strftime("%Y-%m-%d", time.localtime())+".log"
with open(os.path.join(config.logsDir,logname),"ab") as f:
msg="\n[url]%s\n[title]%s\n[error]%s\n"%(it["url"],it["title"],e)
f.write(msg.encode("utf-8"))
time.sleep(5)
4. lib/config.py 配置文件
[Python] 纯文本查看 复制代码 import os
# 执行过程消息反馈
message=[]
# 从作者主页获取的数量
videoNums=500
# 版本 chromedriver playwright
type=""
# 所有的videourls 播放页面
videourls=[]
# 所有的title和视频下载url {title:,url:}
downloads=[]
# 是否组件 浏览器等符合条件,符号才允许继续执行
Yescan=False
# user-agent头
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36"
}
# 代{过}{滤}理
proxies={}
# 线程数量
threadNums=5
# 项目入口目录
rootDir=os.path.realpath("./")
# 存储路径
saveDir= os.path.join(rootDir, "video")
if not os.path.exists(saveDir):
os.mkdir(saveDir)
logsDir=os.path.join(rootDir, "logs")
if not os.path.exists(logsDir):
os.mkdir(logsDir)
binDir= os.path.join(rootDir,'bin')
# 当前运行状态
runing=False
# textel 空集
textel=None
调试方法,激活venv虚拟环境后,python douyin.py 即可显示gui 界面, pyinstaller -w douyin.py 可编译为软件
注意事项:Python 采用代码缩进来区分代码块之间的层次,直接复制代码,有可能提示 indent 等错误,请自行调整缩进或下载附件中源码
源码.zip
(116.06 KB, 下载次数: 104)
已编译好的软件百度盘下载链接: https://pan.baidu.com/s/1t0rMHifwa00zh96WB8Qudw?pwd=r992 提取码: r992 复制这段内容后打开百度网盘手机App,操作更方便哦
win7下注意:
编译好的这个软件,在win7下可能无法运行,提示node.exe 错误:因playwright集成的是nodejs版本16,win7下不支持node 12以上的版本 需要去
https://nodejs.org/download/releases/v12.0.0 下载 win-x64或win-x86 的zip包,解压后用里面的 node.exe 覆盖 软件目录/playwright/driver/node.exe
https://nodejs.org/download/release/v12.0.0/node-v12.0.0-win-x64.zip
|
免费评分
-
查看全部评分
|