MiddleMan 发表于 2024-11-12 23:52

爬取快手全部喜欢视频

本帖最后由 苏紫方璇 于 2024-11-15 20:52 编辑

通过cookie的方式下载喜欢(红心)下的全部视频
直接上代码

#登录模块
from io import BytesIO
from PIL import Image
import base64
import requests
import aiohttp
import asyncio
import aiofiles
import random


#获取登录二维码
def get_login_qrcode():
   
    url ="https://id.kuaishou.cn/rest/c/infra/ks/qr/start"
    params = {
    'sid': 'kuaishou.server.webday7',
    'channelType': 'UNKNOWN',
    'encryptHeaders': ''
}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Content-Length": "63",
    "Content-Type": "application/x-www-form-urlencoded",
    "Cookie": "did=web_740982b1f8a4b3ccc1b1eb37ddb1261e",
    "Host": "id.kuaishou.cn",
    "Origin": "https://www.kuaishou.cn",
    "Pragma": "no-cache",
    "Referer": "https://www.kuaishou.cn/?isHome=1",
    "Sec-CH-UA": '"Chromium";v="130", "Microsoft Edge";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0"
}
   
    data = requests.post(url, headers=headers, data=params).json()
   
    return ,data['qrLoginToken'],data['qrLoginSignature']]
# 解码Base64字符串
def base64_to_image(base64_string):
   
    image_data = base64.b64decode(base64_string)
   
    # 将字节数据转化为图像
    image = Image.open(BytesIO(image_data))
   
    # 返回图像对象
    return image
# 检查登录是否生效
def Check_takes_effect(qrLoginToken,qrLoginSignature):
    url = "https://id.kuaishou.com/rest/c/infra/ks/qr/scanResult"
    params = {
      'qrLoginToken': qrLoginToken,
      'qrLoginSignature': qrLoginSignature,
      'channelType': 'UNKNOWN',
      'encryptHeaders': ''
    }
    headers = {
      "Accept": "*/*",
      "Accept-Encoding": "gzip, deflate, br, zstd",
      "Accept-Language": "zh-CN,zh;q=0.9",
      "Connection": "keep-alive",
      "Content-Length": "93",
      "Content-Type": "application/x-www-form-urlencoded",
      "Host": "id.kuaishou.com",
      "Origin": "https://www.kuaishou.com",
      "Referer": "https://www.kuaishou.com/?isHome=1",
      "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
      "Sec-CH-UA-Mobile": "?0",
      "Sec-CH-UA-Platform": '"Windows"',
      "Sec-Fetch-Dest": "empty",
      "Sec-Fetch-Mode": "cors",
      "Sec-Fetch-Site": "same-site",
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    if response['result'] == 707:
      print("二维码已经失效,请重新获取")
      return False
    elif response['result'] == 1:
      print("已经扫码,等待确认")
      return True
   
#手机端确认登录
def Confirm_status(qrLoginToken,qrLoginSignature):
    url = "https://id.kuaishou.com/rest/c/infra/ks/qr/acceptResult"
    params = {
    "qrLoginToken": qrLoginToken,
    "qrLoginSignature": qrLoginSignature,
    "sid": "kuaishou.server.webday7",
    "channelType": "UNKNOWN",
    "encryptHeaders": ""
}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Connection": "keep-alive",
    "Content-Length": "121",
    "Content-Type": "application/x-www-form-urlencoded",
    "Host": "id.kuaishou.com",
    "Origin": "https://www.kuaishou.com",
    "Referer": "https://www.kuaishou.com/?isHome=1",
    "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    if response['result'] == 1:
      print("登录成功")
      return response['qrToken']
#返回cookie
def kuaishou_callback(qrToken):
    cookie = ""
    url = "https://id.kuaishou.com/pass/kuaishou/login/qr/callback"
    params = {
      "qrToken": qrToken,
      "sid": "kuaishou.server.webday7",
      "channelType": "UNKNOWN",
      "encryptHeaders": ""
}
    headers = {
    "accept": "*/*",
    "accept-encoding": "gzip, deflate, br, zstd",
    "accept-language": "zh-CN,zh;q=0.9",
    "cache-control": "no-cache",
    "connection": "keep-alive",
    "content-length": "354",
    "content-type": "application/x-www-form-urlencoded",
    "cookie": "did=web_110d3c68ae9762f3af4fcdc734b87449",
    "host": "id.kuaishou.com",
    "origin": "https://www.kuaishou.com",
    "pragma": "no-cache",
    "referer": "https://www.kuaishou.com/?isHome=1",
    "sec-ch-ua": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, data=params).json()
    userId = response['userId']
    kuaishou_server_webday7_at = response['kuaishou.server.webday7.at']
    kuaishou_server_webday7_st = response['kuaishou.server.webday7_st']
    cookie = structure_cookie(kuaishou_server_webday7_at,kuaishou_server_webday7_st,userId)
    cookie = {key: str(value) for key, value in cookie.items()}
    return cookie
#构造cookie
def structure_cookie(webday7_ph,webday7_st,userId):
    cookie = {
      'clientid':3,
      'did':'web_110d3c68ae9762f3af4fcdc734b87449',
      'kpf':'PC_WEB',
      'kpn':'KUAISHOU_VISION',
      'kuaishou.server.webday7_ph':str(webday7_ph),
      'kuaishou.server.webday7_st':str(webday7_st),
      'userId':str(userId)
    }
    return cookie
#获取jaon文件
def get_use_video_info(cookie):
    url ="https://www.kuaishou.com/graphql"
    params = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":"","page":"profile"},"query":"fragment photoContent on PhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n    url\n    __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment recoPhotoFragment on recoPhotoEntity {\n__typename\nid\nduration\ncaption\noriginCaption\nlikeCount\nviewCount\ncommentCount\nrealLikeCount\ncoverUrl\nphotoUrl\nphotoH265Url\nmanifest\nmanifestH265\nvideoResource\ncoverUrls {\n    url\n    __typename\n}\ntimestamp\nexpTag\nanimatedCoverUrl\ndistance\nvideoRatio\nliked\nstereoType\nprofileUserTopPhoto\nmusicBlocked\nriskTagContent\nriskTagUrl\n}\n\nfragment feedContentWithLiveInfo on Feed {\ntype\nauthor {\n    id\n    name\n    headerUrl\n    following\n    livingInfo\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n}\nphoto {\n    ...photoContent\n    ...recoPhotoFragment\n    __typename\n}\ncanAddComment\nllsid\nstatus\ncurrentPcursor\ntags {\n    type\n    name\n    __typename\n}\n__typename\n}\n\nquery visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\nvisionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContentWithLiveInfo\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n}\n}\n"}
    headers = {
    "Accept": "*/*",
    "Accept-Encoding": "gzip, deflate, br, zstd",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Content-Length": "1775",
    "Content-Type": "application/json",
    #"Cookie": cookie,
    "Host": "www.kuaishou.com",
    "Origin": "https://www.kuaishou.com",
    "Pragma": "no-cache",
    "Referer": "https://www.kuaishou.com/profile/3xisyfk6x2djz44",
    "Sec-CH-UA": '"Chromium";v="130", "Google Chrome";v="130", "Not?A_Brand";v="99"',
    "Sec-CH-UA-Mobile": "?0",
    "Sec-CH-UA-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
}
    response = requests.post(url, headers=headers, json=params,cookies=cookie,verify=False)
    if response.status_code == 200:
      data = response.json()
      print("获取信息文件成功...")
      return data
#下载视频
async def download_video(video_url):
    filename = str(random.randint(1000,9999))+'.mp4'
    path = r"C:\Users\MiddleMan\Videos\kuaishou\\"
    video_name = path+filename
    async with aiohttp.ClientSession() as session:
      async with session.get(video_url) as response:
            if response.status == 200:
                f = await aiofiles.open(video_name, mode='wb')
                content = await response.read()
                await f.write(content)
                await f.close()
                print(f"{filename}下载成功")
            else:
                print(f"{filename}下载失败")
#主函数
async def main():
    tasks = []
   
    # 获取登录二维码
    qrcode_base64 = get_login_qrcode()
   
    # 解码二维码
    qrcode_image = base64_to_image(qrcode_base64)
   
    # 显示二维码
    qrcode_image.show()
   
    bool_takes_effect = Check_takes_effect(qrcode_base64,qrcode_base64)
   
    if bool_takes_effect:
      #关闭二维码
      qrcode_image.close()
      qrToken = Confirm_status(qrcode_base64,qrcode_base64)
      cookie = kuaishou_callback(qrToken)
      data = get_use_video_info(cookie)
      data_list = data['data']['visionProfileLikePhotoList']['feeds']
      print(f"共{len(data_list)}个视频开始下载")
      for result in data_list:
            #print(result['photo']['videoResource']['h264']['adaptationSet']['representation']['url'])
            url = result['photo']['videoResource']['h264']['adaptationSet']['representation']['url']
            tasks.append(asyncio.create_task(download_video(url)))
    await asyncio.wait(tasks)
if __name__ == '__main__':
    asyncio.run(main())

zzh151223 发表于 2024-11-22 22:55

我自己写了一个,可以正常运行,但是隔天之后必须用浏览器访问一下网页或者app才能继续获取数据,否则返回全是未登录相关的代码,我从浏览器控制台找了一些xhr尝试了一下,有个应该是登录接口,之后我每天开始爬视频时就先访问那个登录接口,正常了一天,后面又失效了。现在完全没头绪了,只能每天爬之前手动访问一下浏览器,太麻烦了

laugh68 发表于 2024-11-14 13:02

牛逼,这下可以把这些没营养(划掉)的视频都保存下来了,话说回来,慢脚的尺度和阿B差不多了,反而是dy现在审核比较严格了。

Chielly 发表于 2024-11-13 09:11

感谢分享

qq1475 发表于 2024-11-13 09:13

好家伙,这高低需要个营养快线

chensvip 发表于 2024-11-13 09:32

这个有点意思

zzt5211314 发表于 2024-11-13 09:38

感谢分享

cpckly 发表于 2024-11-13 09:41

感谢分享,测试下效果如何

Qinmuyi 发表于 2024-11-13 09:58

感谢楼主分享

linfenglin 发表于 2024-11-13 10:21

太帅了哥

smileluoye 发表于 2024-11-13 10:23

太牛了太牛了{:1_921:}

sizhan19861117 发表于 2024-11-13 10:24

伙计 有没有成品?我喜欢的很多视频 想下载 真的不好弄
页: [1] 2 3 4
查看完整版本: 爬取快手全部喜欢视频