爬取快手全部喜欢视频

MiddleMan 发表于 2024-11-12 23:52

本帖最后由苏紫方璇于 2024-11-15 20:52 编辑

通过cookie的方式下载喜欢(红心)下的全部视频
直接上代码

#登录模块
from io import BytesIO
from PIL import Image
import base64
import requests
import aiohttp
import asyncio
import aiofiles
import random

#获取登录二维码
def get_login_qrcode():

url ="https://id.kuaishou.cn/rest/c/infra/ks/qr/start"
params = {
'sid': 'kuaishou.server.webday7',
'channelType': 'UNKNOWN',
'encryptHeaders': ''
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "63",
"Content-Type": "application/x-www-form-urlencoded",
"Cookie": "did=web_740982b1f8a4b3ccc1b1eb37ddb1261e",
"Host": "id.kuaishou.cn",
"Origin": "https://www.kuaishou.cn",
"Pragma": "no-cache",
"Referer": "https://www.kuaishou.cn/?isHome=1",
"Sec-CH-UA": '"Chromium";v="130", "Microsoft Edge";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
}

data = requests.post(url, headers=headers, data=params).json()

return ,data['qrLoginToken'],data['qrLoginSignature']]
# 解码Base64字符串
def base64_to_image(base64_string):

image_data = base64.b64decode(base64_string)

# 将字节数据转化为图像
image = Image.open(BytesIO(image_data))

# 返回图像对象
return image
# 检查登录是否生效
def Check_takes_effect(qrLoginToken,qrLoginSignature):
url = "https://id.kuaishou.com/rest/c/infra/ks/qr/scanResult"
params = {
   'qrLoginToken': qrLoginToken,
   'qrLoginSignature': qrLoginSignature,
   'channelType': 'UNKNOWN',
   'encryptHeaders': ''
}
headers = {
   "Accept": "*/*",
   "Accept-Encoding": "gzip, deflate, br, zstd",
   "Accept-Language": "zh-CN,zh;q=0.9",
   "Connection": "keep-alive",
   "Content-Length": "93",
   "Content-Type": "application/x-www-form-urlencoded",
   "Host": "id.kuaishou.com",
   "Origin": "https://www.kuaishou.com",
   "Referer": "https://www.kuaishou.com/?isHome=1",
   "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
   "Sec-CH-UA-Mobile": "?0",
   "Sec-CH-UA-Platform": '"Windows"',
   "Sec-Fetch-Dest": "empty",
   "Sec-Fetch-Mode": "cors",
   "Sec-Fetch-Site": "same-site",
   "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
if response['result'] == 707:
   print("二维码已经失效，请重新获取")
   return False
elif response['result'] == 1:
   print("已经扫码,等待确认")
   return True

#手机端确认登录
def Confirm_status(qrLoginToken,qrLoginSignature):
url = "https://id.kuaishou.com/rest/c/infra/ks/qr/acceptResult"
params = {
"qrLoginToken": qrLoginToken,
"qrLoginSignature": qrLoginSignature,
"sid": "kuaishou.server.webday7",
"channelType": "UNKNOWN",
"encryptHeaders": ""
}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "121",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "id.kuaishou.com",
"Origin": "https://www.kuaishou.com",
"Referer": "https://www.kuaishou.com/?isHome=1",
"Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
if response['result'] == 1:
   print("登录成功")
   return response['qrToken']
#返回cookie
def kuaishou_callback(qrToken):
cookie = ""
url = "https://id.kuaishou.com/pass/kuaishou/login/qr/callback"
params = {
   "qrToken": qrToken,
   "sid": "kuaishou.server.webday7",
   "channelType": "UNKNOWN",
   "encryptHeaders": ""
}
headers = {
"accept": "*/*",
"accept-encoding": "gzip, deflate, br, zstd",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"connection": "keep-alive",
"content-length": "354",
"content-type": "application/x-www-form-urlencoded",
"cookie": "did=web_110d3c68ae9762f3af4fcdc734b87449",
"host": "id.kuaishou.com",
"origin": "https://www.kuaishou.com",
"pragma": "no-cache",
"referer": "https://www.kuaishou.com/?isHome=1",
"sec-ch-ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, data=params).json()
userId = response['userId']
kuaishou_server_webday7_at = response['kuaishou.server.webday7.at']
kuaishou_server_webday7_st = response['kuaishou.server.webday7_st']
cookie = structure_cookie(kuaishou_server_webday7_at,kuaishou_server_webday7_st,userId)
cookie = {key: str(value) for key, value in cookie.items()}
return cookie
#构造cookie
def structure_cookie(webday7_ph,webday7_st,userId):
cookie = {
   'clientid':3,
   'did':'web_110d3c68ae9762f3af4fcdc734b87449',
   'kpf':'PC_WEB',
   'kpn':'KUAISHOU_VISION',
   'kuaishou.server.webday7_ph':str(webday7_ph),
   'kuaishou.server.webday7_st':str(webday7_st),
   'userId':str(userId)
}
return cookie
#获取jaon文件
def get_use_video_info(cookie):
url ="https://www.kuaishou.com/graphql"
params = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":"","page":"profile"},"query":"fragment photoContent on PhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n url\n __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n url\n __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment feedContentWithLiveInfo on Feed {\ntype\nauthor {\n id\n name\n headerUrl\n following\n livingInfo\n headerUrls {\n    url\n    __typename\n }\n __typename\n}\nphoto {\n ...photoContent\n ...recoPhotoFragment\n __typename\n}\ncanAddComment\nllsid\nstatus\ncurrentPcursor\ntags {\n type\n name\n __typename\n}\n__typename\n}\n\nquery visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\nvisionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n    ...feedContentWithLiveInfo\n    __typename\n }\n hostName\n pcursor\n __typename\n}\n}\n"}
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br, zstd",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Length": "1775",
"Content-Type": "application/json",
#"Cookie": cookie,
"Host": "www.kuaishou.com",
"Origin": "https://www.kuaishou.com",
"Pragma": "no-cache",
"Referer": "https://www.kuaishou.com/profile/3xisyfk6x2djz44",
"Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
"Sec-CH-UA-Mobile": "?0",
"Sec-CH-UA-Platform": '"Windows"',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
response = requests.post(url, headers=headers, json=params,cookies=cookie,verify=False)
if response.status_code == 200:
   data = response.json()
   print("获取信息文件成功...")
   return data
#下载视频
async def download_video(video_url):
filename = str(random.randint(1000,9999))+'.mp4'
path = r"C:\Users\MiddleMan\Videos\kuaishou\\"
video_name = path+filename
async with aiohttp.ClientSession() as session:
   async with session.get(video_url) as response:
         if response.status == 200:
            f = await aiofiles.open(video_name, mode='wb')
            content = await response.read()
            await f.write(content)
            await f.close()
            print(f"{filename}下载成功")
         else:
            print(f"{filename}下载失败")
#主函数
async def main():
tasks = []

# 获取登录二维码
qrcode_base64 = get_login_qrcode()

# 解码二维码
qrcode_image = base64_to_image(qrcode_base64)

# 显示二维码
qrcode_image.show()

bool_takes_effect = Check_takes_effect(qrcode_base64,qrcode_base64)

if bool_takes_effect:
   #关闭二维码
   qrcode_image.close()
   qrToken = Confirm_status(qrcode_base64,qrcode_base64)
   cookie = kuaishou_callback(qrToken)
   data = get_use_video_info(cookie)
   data_list = data['data']['visionProfileLikePhotoList']['feeds']
   print(f"共{len(data_list)}个视频开始下载")
   for result in data_list:
         #print(result['photo']['videoResource']['h264']['adaptationSet']['representation']['url'])
         url = result['photo']['videoResource']['h264']['adaptationSet']['representation']['url']
         tasks.append(asyncio.create_task(download_video(url)))
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())

zzh151223 发表于 2024-11-22 22:55

我自己写了一个，可以正常运行，但是隔天之后必须用浏览器访问一下网页或者app才能继续获取数据，否则返回全是未登录相关的代码，我从浏览器控制台找了一些xhr尝试了一下，有个应该是登录接口，之后我每天开始爬视频时就先访问那个登录接口，正常了一天，后面又失效了。现在完全没头绪了，只能每天爬之前手动访问一下浏览器，太麻烦了

laugh68 发表于 2024-11-14 13:02

牛逼，这下可以把这些没营养（划掉）的视频都保存下来了，话说回来，慢脚的尺度和阿B差不多了，反而是dy现在审核比较严格了。

Chielly 发表于 2024-11-13 09:11

感谢分享

qq1475 发表于 2024-11-13 09:13

好家伙，这高低需要个营养快线

chensvip 发表于 2024-11-13 09:32

这个有点意思

zzt5211314 发表于 2024-11-13 09:38

感谢分享

cpckly 发表于 2024-11-13 09:41

感谢分享，测试下效果如何

Qinmuyi 发表于 2024-11-13 09:58

感谢楼主分享

linfenglin 发表于 2024-11-13 10:21

太帅了哥

smileluoye 发表于 2024-11-13 10:23

太牛了太牛了{:1_921:}

sizhan19861117 发表于 2024-11-13 10:24

伙计有没有成品？我喜欢的很多视频想下载真的不好弄

页: [1] 2 3 4

吾爱破解 - 52pojie.cn's Archiver

爬取快手全部喜欢视频