爬取快手全部喜欢视频
本帖最后由 苏紫方璇 于 2024-11-15 20:52 编辑通过cookie的方式下载喜欢(红心)下的全部视频
直接上代码
#登录模块
from io import BytesIO
from PIL import Image
import base64
import requests
import aiohttp
import asyncio
import aiofiles
import random
#获取登录二维码
def get_login_qrcode():
url ="https://id.kuaishou.cn/rest/c/infra/ks/qr/start"
params = {
'sid': 'kuaishou.server.webday7',
'channelType': 'UNKNOWN',
'encryptHeaders': ''
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "63",
"Content-Type": "application/x-www-form-urlencoded",
"Cookie": "did=web_740982b1f8a4b3ccc1b1eb37ddb1261e",
"Host": "id.kuaishou.cn",
"Origin": "https://www.kuaishou.cn",
"Pragma": "no-cache",
"Referer": "https://www.kuaishou.cn/?isHome=1",
"Sec-CH-UA": '"Chromium";v="130", "Microsoft Edge";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
}
data = requests.post(url, headers=headers, data=params).json()
return ,data['qrLoginToken'],data['qrLoginSignature']]
# 解码Base64字符串
def base64_to_image(base64_string):
image_data = base64.b64decode(base64_string)
# 将字节数据转化为图像
image = Image.open(BytesIO(image_data))
# 返回图像对象
return image
# 检查登录是否生效
def Check_takes_effect(qrLoginToken,qrLoginSignature):
url = "https://id.kuaishou.com/rest/c/infra/ks/qr/scanResult"
params = {
'qrLoginToken': qrLoginToken,
'qrLoginSignature': qrLoginSignature,
'channelType': 'UNKNOWN',
'encryptHeaders': ''
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "93",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "id.kuaishou.com",
"Origin": "https://www.kuaishou.com",
"Referer": "https://www.kuaishou.com/?isHome=1",
"Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
if response['result'] == 707:
print("二维码已经失效,请重新获取")
return False
elif response['result'] == 1:
print("已经扫码,等待确认")
return True
#手机端确认登录
def Confirm_status(qrLoginToken,qrLoginSignature):
url = "https://id.kuaishou.com/rest/c/infra/ks/qr/acceptResult"
params = {
"qrLoginToken": qrLoginToken,
"qrLoginSignature": qrLoginSignature,
"sid": "kuaishou.server.webday7",
"channelType": "UNKNOWN",
"encryptHeaders": ""
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "121",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "id.kuaishou.com",
"Origin": "https://www.kuaishou.com",
"Referer": "https://www.kuaishou.com/?isHome=1",
"Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
if response['result'] == 1:
print("登录成功")
return response['qrToken']
#返回cookie
def kuaishou_callback(qrToken):
cookie = ""
url = "https://id.kuaishou.com/pass/kuaishou/login/qr/callback"
params = {
"qrToken": qrToken,
"sid": "kuaishou.server.webday7",
"channelType": "UNKNOWN",
"encryptHeaders": ""
}
headers = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"connection": "keep-alive",
"content-length": "354",
"content-type": "application/x-www-form-urlencoded",
"cookie": "did=web_110d3c68ae9762f3af4fcdc734b87449",
"host": "id.kuaishou.com",
"origin": "https://www.kuaishou.com",
"pragma": "no-cache",
"referer": "https://www.kuaishou.com/?isHome=1",
"sec-ch-ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
userId = response['userId']
kuaishou_server_webday7_at = response['kuaishou.server.webday7.at']
kuaishou_server_webday7_st = response['kuaishou.server.webday7_st']
cookie = structure_cookie(kuaishou_server_webday7_at,kuaishou_server_webday7_st,userId)
cookie = {key: str(value) for key, value in cookie.items()}
return cookie
#构造cookie
def structure_cookie(webday7_ph,webday7_st,userId):
cookie = {
'clientid':3,
'did':'web_110d3c68ae9762f3af4fcdc734b87449',
'kpf':'PC_WEB',
'kpn':'KUAISHOU_VISION',
'kuaishou.server.webday7_ph':str(webday7_ph),
'kuaishou.server.webday7_st':str(webday7_st),
'userId':str(userId)
}
return cookie
#获取jaon文件
def get_use_video_info(cookie):
url ="https://www.kuaishou.com/graphql"
params = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":"","page":"profile"},"query":"fragment photoContent on PhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n url\n __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n url\n __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment feedContentWithLiveInfo on Feed {\ntype\nauthor {\n id\n name\n headerUrl\n following\n livingInfo\n headerUrls {\n url\n __typename\n }\n __typename\n}\nphoto {\n ...photoContent\n ...recoPhotoFragment\n __typename\n}\ncanAddComment\nllsid\nstatus\ncurrentPcursor\ntags {\n type\n name\n __typename\n}\n__typename\n}\n\nquery visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\nvisionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContentWithLiveInfo\n __typename\n }\n hostName\n pcursor\n __typename\n}\n}\n"}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "1775",
"Content-Type": "application/json",
#"Cookie": cookie,
"Host": "www.kuaishou.com",
"Origin": "https://www.kuaishou.com",
"Pragma": "no-cache",
"Referer": "https://www.kuaishou.com/profile/3xisyfk6x2djz44",
"Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, json=params,cookies=cookie,verify=False)
if response.status_code == 200:
data = response.json()
print("获取信息文件成功...")
return data
#下载视频
async def download_video(video_url):
filename = str(random.randint(1000,9999))+'.mp4'
path = r"C:\Users\MiddleMan\Videos\kuaishou\\"
video_name = path+filename
async with aiohttp.ClientSession() as session:
async with session.get(video_url) as response:
if response.status == 200:
f = await aiofiles.open(video_name, mode='wb')
content = await response.read()
await f.write(content)
await f.close()
print(f"{filename}下载成功")
else:
print(f"{filename}下载失败")
#主函数
async def main():
tasks = []
# 获取登录二维码
qrcode_base64 = get_login_qrcode()
# 解码二维码
qrcode_image = base64_to_image(qrcode_base64)
# 显示二维码
qrcode_image.show()
bool_takes_effect = Check_takes_effect(qrcode_base64,qrcode_base64)
if bool_takes_effect:
#关闭二维码
qrcode_image.close()
qrToken = Confirm_status(qrcode_base64,qrcode_base64)
cookie = kuaishou_callback(qrToken)
data = get_use_video_info(cookie)
data_list = data['data']['visionProfileLikePhotoList']['feeds']
print(f"共{len(data_list)}个视频开始下载")
for result in data_list:
#print(result['photo']['videoResource']['h264']['adaptationSet']['representation']['url'])
url = result['photo']['videoResource']['h264']['adaptationSet']['representation']['url']
tasks.append(asyncio.create_task(download_video(url)))
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())
我自己写了一个,可以正常运行,但是隔天之后必须用浏览器访问一下网页或者app才能继续获取数据,否则返回全是未登录相关的代码,我从浏览器控制台找了一些xhr尝试了一下,有个应该是登录接口,之后我每天开始爬视频时就先访问那个登录接口,正常了一天,后面又失效了。现在完全没头绪了,只能每天爬之前手动访问一下浏览器,太麻烦了 牛逼,这下可以把这些没营养(划掉)的视频都保存下来了,话说回来,慢脚的尺度和阿B差不多了,反而是dy现在审核比较严格了。 感谢分享 好家伙,这高低需要个营养快线 这个有点意思 感谢分享 感谢分享,测试下效果如何 感谢楼主分享 太帅了哥 太牛了太牛了{:1_921:} 伙计 有没有成品?我喜欢的很多视频 想下载 真的不好弄