python爬取抖音作者主页视频
本帖最后由 安逸丫 于 2022-12-14 00:48 编辑import os
import requests
# 初始化文件夹
def ini():
# 判断video文件夹是否存在
if not os.path.exists('video'):
os.mkdir('video')
# 判断主页文件夹是否存在
if not os.path.exists('video/主页'):
os.mkdir('video/主页')
# 替换标题中的特殊字符
def replace(title):
title = title.replace('\\', '')
title = title.replace('/', '')
title = title.replace(':', '')
title = title.replace('*', '')
title = title.replace('?', '')
title = title.replace('"', '')
title = title.replace('<', '')
title = title.replace('>', '')
title = title.replace('|', '')
title = title.replace('\n', '')
return title
# 链接重定向
def redirect(url):
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36'
}
response = requests.get(url, headers=header)
return response.url
def start():
url = input('请输入作者链接:')
home(url)
def home(url):
# 重定向
url = redirect(url)
sec_uid = url.split('user/').split('?')
# 初始化游标
max_cursor = 0
# 初始化视频数量
quantity = 0
# 开始爬取
while True:
# 获取视频列表
url = f'https://m.douyin.com/web/api/v2/aweme/post/?reflow_source=reflow_page&sec_uid={sec_uid}&count=21&max_cursor={max_cursor}'
response = requests.get(url)
data = response.json()
# 获取视频列表
aweme_list = data['aweme_list']
for aweme in aweme_list:
# 更新视频数量
quantity += 1
# 获取视频标题
desc = aweme['desc']
# 获取作者名称
author = aweme['author']['nickname']
# 获取视频链接
video_url = aweme['video']['play_addr']['url_list']
# 判断作者文件夹是否存在
if not os.path.exists(f'video/主页/{author}'):
os.mkdir(f'video/主页/{author}')
# 替换标题中的特殊字符
desc = replace(desc)
# 判断视频标题是否为空,为空则使用视频quantity+作者名称作为视频标题
if desc == '':
desc = f'{quantity}{author}'
# 下载提示
print(f'正在下载第{quantity}个视频:{quantity}.{desc}')
# 开始下载
with open(f'video/主页/{author}/{quantity}_{desc}.mp4', 'wb') as f:
f.write(requests.get(video_url).content)
# 超时处理
# 判断是否还有下一页
if data['has_more']:
# 更新游标
max_cursor = data['max_cursor']
else:
# 没有下一页则结束程序
print('视频下载完成,累计下载视频数量:', quantity)
if __name__ == '__main__':
ini()
start()
该文章已失效请前往新帖:https://www.52pojie.cn/thread-1725336-1-1.html 本帖最后由 安逸丫 于 2022-12-8 23:41 编辑
wapj小新手 发表于 2022-12-7 20:08
12.5号测试回执json为空,post端口好像失效了
在请求头添加User-Agent willanx 发表于 2022-9-7 17:33
请问下request.get 的请求是怎么获取到,pc端的抖音抓不到这个链接
切换手机模式刷新一下就出来了 请输入作者主页链接:https://www.douyin.com/user/MS4wLjABAAAAgHRTHEoERbW0q5mBVCjysdCBd7Gh9rVUXskIDByGQXo?vid=7132330132320144678
Traceback (most recent call last):
File "c:/Users/zhou/Documents/百米货栈PAD/抖音下载.py", line 42, in <module>
url = requests.get(url, headers=headers).url# 获取重定向后的url
File "C:\Users\zhou\anaconda3\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\zhou\anaconda3\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\zhou\anaconda3\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\zhou\anaconda3\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\zhou\anaconda3\lib\site-packages\requests\adapters.py", line 439, in send
resp = conn.urlopen(
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 696, in urlopen
self._prepare_proxy(conn)
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\connectionpool.py", line 964, in _prepare_proxy
conn.connect()
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\connection.py", line 359, in connect
conn = self._connect_tls_proxy(hostname, conn)
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\connection.py", line 500, in _connect_tls_proxy
return ssl_wrap_socket(
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\util\ssl_.py", line 432, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls)
File "C:\Users\zhou\anaconda3\lib\site-packages\urllib3\util\ssl_.py", line 474, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock)
File "C:\Users\zhou\anaconda3\lib\ssl.py", line 500, in wrap_socket
return self.sslsocket_class._create(
File "C:\Users\zhou\anaconda3\lib\ssl.py", line 997, in _create
raise ValueError("check_hostname requires server_hostname")
ValueError: check_hostname requires server_hostname 好软件,下载下来试试 感谢楼主分享!代码拿去学习了 你这个代码注释习惯挺好的。 print(f'正在下载第{quantity}个视频:{video_name}')# 打印视频名称
UnicodeEncodeError: 'UCS-2' codec can't encode characters in position 22-22: Non-BMP character not supported in Tk
感谢楼主分享!代码拿去学习了
zcmrp 发表于 2022-9-6 21:04
print(f'正在下载第{quantity}个视频:{video_name}')# 打印视频名称
UnicodeEncodeError: 'UCS-2'...
你链接发我看看 双眼皮的微笑 发表于 2022-9-6 20:55
你这个代码注释习惯挺好的。
我这注释太多反而觉得乱乱的 感谢楼主分享,我也在找,自己写了一个,老运行失败。