python+selenium 下载快手小视频
本帖最后由 1170 于 2020-7-30 20:27 编辑快手的小视频下载,通过python+selenium获取,只能下载有水印的视频,需要Chrome浏览器以及对应版本的驱动,开启多线程后速度相应的有所提升,但是很可能导致cookies失效,代码比较凌乱大家多多担待,好了废话不多说直接上代码。
先填好要爬取的用户ID,运行程序后,扫描登录快手网页版,就可以自动下载。不知道ID的话可以先运行获取自己已关注的ID列表,复制好ID在控制台输入即可。
import requests
import os
import json
import time
import re
from selenium import webdriver
from concurrent.futures import ProcessPoolExecutor
# 通过selenium获取cookies
def get_ck():
dr = webdriver.Chrome()
dr.maximize_window()
dr.get('https://live.kuaishou.com/')
time.sleep(2)
dr.find_element_by_class_name('login').click()
cookies_dict = {}
while True:
time.sleep(1)
ck_login = dr.find_element_by_xpath("//span[@class='user-info-name']").text
if ck_login:
dr.get('https://live.kuaishou.com/cate/my-follow/all')
dr.implicitly_wait(10)
print('登录成功,获取cookies中')
with open("cookies.txt", "w") as fp:
json.dump(dr.get_cookies(), fp)
for i in dr.get_cookies():
cookies_dict] = i["value"]
break
dr.close()
return cookies_dict
# 获取保存到文件中的cookies
def gt_ck():
if os.path.exists('cookies.txt') and os.path.getsize('cookies.txt') > 0:
cookies_dict = {}
with open("cookies.txt", "r") as fp:
cookies = json.load(fp)
for cookie in cookies:
cookies_dict] = cookie['value']
else:
cookies_dict = get_ck()
return cookies_dict
#获取我所有的关注列表
def get_all_likes():
headers = {
"accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
"Connection": "keep-alive",
"content-type": "application/json",
"Host": "live.kuaishou.com",
"Origin": "https://live.kuaishou.com",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
}
cookies_dict = gt_ck()
data = {
"operationName": "FollowQuery",
"variables": {"count": 500, "pcursor": ""},
"query": "query FollowQuery($pcursor: String, $count: Int) {\nallFollows(pcursor: $pcursor, count: $count) {\n list {\n id\n name\n living\n avatar\n sex\n description\n counts {\n fan\n follow\n photo\n __typename\n }\n __typename\n }\n pcursor\n __typename\n}\n}\n"
}
url = 'https://live.kuaishou.com/m_graphql'
r = requests.post(url, data=json.dumps(data), headers=headers, cookies=cookies_dict)
te = json.loads(r.content.decode('utf-8'))
all = []
lis = te['data']['allFollows']['list']
if lis:
for i in lis:
id = i['id']
name = i['name']
d =
all.append(d)
else:
get_ck()
get_all_likes()
return all
# Windows下文件夹及文件命名清洗格式
def stxip(path):
path = re.sub(r'[?\\*|“<>:/\n]', '', str(path))
return path
# 根据快手ID获取每个作品的ID
def get_videolis(idd):
headers = {
"accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
"Connection": "keep-alive",
"content-type": "application/json",
"Host": "live.kuaishou.com",
"Origin": "https://live.kuaishou.com",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
}
cookies_dict = gt_ck()
data = {
"operationName": "publicFeedsQuery",
"variables": {
"principalId": idd,
"pcursor": "", "count": 2400
},
"query": "query publicFeedsQuery($principalId: String, $pcursor: String, $count: Int) {\npublicFeeds(principalId: $principalId, pcursor: $pcursor, count: $count) {\n pcursor\n live {\n user {\n id\n avatar\n name\n __typename\n }\n watchingCount\n poster\n coverUrl\n caption\n id\n playUrls {\n quality\n url\n __typename\n }\n quality\n gameInfo {\n category\n name\n pubgSurvival\n type\n kingHero\n __typename\n }\n hasRedPack\n liveGuess\n expTag\n __typename\n }\n list {\n id\n thumbnailUrl\n poster\n workType\n type\n useVideoPlayer\n imgUrls\n imgSizes\n magicFace\n musicName\n caption\n location\n liked\n onlyFollowerCanComment\n relativeHeight\n timestamp\n width\n height\n counts {\n displayView\n displayLike\n displayComment\n __typename\n }\n user {\n id\n eid\n name\n avatar\n __typename\n }\n expTag\n __typename\n }\n __typename\n}\n}\n"
}
url = 'https://live.kuaishou.com/m_graphql'
r = requests.post(url, data=json.dumps(data), headers=headers, cookies=cookies_dict)
te = json.loads(r.content.decode('utf-8'))
lis = te['data']['publicFeeds']['list']
v_lis = []
if lis:
for i in lis:
n = i['caption'].split('#')
if n:
n = n.strip()
else:
n = n.strip()
n = stxip(n)
cag = i['user']['name']
cag = stxip(cag)
d = , n, cag]
v_lis.append(d)
else:
get_ck()
get_videolis(idd)
# print(v_lis)
return v_lis
# 视屏下载
def down_video(url, name, path):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
}
cookies_dict = gt_ck()
try:
res = requests.get(url, headers=headers, cookies=cookies_dict)
with open(path + name, 'wb') as f:
f.write(res.content)
except Exception as e:
print(e)
# 单个快手主播下载
def get_video(idd):
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh-TW;q=0.9,zh;q=0.8",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
"Host": "live.kuaishou.com",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36",
}
v_lis = get_videolis(idd)
cookies_dict = gt_ck()
for i in v_lis:
url = 'https://live.kuaishou.com/u/{}/{}?did={}'.format(idd, i, cookies_dict['did'])
try:
res = requests.get(url, headers=headers, cookies=cookies_dict)
v_url = re.findall('"playUrl":"(.*)mp4', res.text)
path = 'video/' + i + '/'
if not os.path.exists(path):
os.mkdir(path)
v_url = v_url.replace(r'\u002F', '/') + 'mp4'
name = i + i + '.mp4'
print('正在下载' + name)
down_video(v_url, name, path)
except Exception as e:
print(e)
if __name__ == '__main__':
# 多线程 速度过快容易使cookies失效
# ppe = ProcessPoolExecutor(max_workers=2)
# a = get_all_likes()
# for i in a:
# ppe.submit(get_video, i)
# ppe.shutdown(wait=True)
# 列出所有的关注
a = get_all_likes()
for i in a:
print(i)
# 单个快手ID下载该用户所有视频,更换ID即可
ID = str(input('请输入用户ID:'))
get_video(ID)
zhuxiaoyuan 发表于 2020-7-30 18:30
具体怎么用啊老铁
要有python 环境,然后pip install requests ,pip install selenium 就可一运行了 具体怎么用啊老铁 厉害了大佬感谢,学了一波
页:
[1]