python下载快手个人主页所有已点赞作品

thisbug 发表于 2023-9-29 16:40

本帖最后由苏紫方璇于 2023-9-29 22:08 编辑

import json
import re
import os
# import requests
import urllib.request
from multiprocessing import Pool
import time

requestUrl = 'https://www.kuaishou.com/graphql'
folder_path = 'D:\kuaishou'#抓cookiektrace-context开头的一串，通过h5快手抓取
cookie = ''
pcursor = '1'

def post(Cookie,pcursor):
data = {"operationName":"visionProfileLikePhotoList","variables":{"pcursor":pcursor,"page":"profile"},"query":"query visionProfileLikePhotoList($pcursor: String, $page: String, $webPageArea: String) {\nvisionProfileLikePhotoList(pcursor: $pcursor, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n    type\n    author {\n    id\n    name\n    following\n    headerUrl\n    headerUrls {\n       cdn\n       url\n       __typename\n    }\n    __typename\n    }\n    tags {\n    type\n    name\n    __typename\n    }\n    photo {\n    id\n    duration\n    caption\n    likeCount\n    realLikeCount\n    coverUrl\n    coverUrls {\n       cdn\n       url\n       __typename\n    }\n    photoUrls {\n       cdn\n       url\n       __typename\n    }\n    photoUrl\n    liked\n    timestamp\n    expTag\n    animatedCoverUrl\n    stereoType\n    videoRatio\n    __typename\n    }\n    canAddComment\n    currentPcursor\n    llsid\n    status\n    __typename\n }\n hostName\n pcursor\n __typename\n}\n}\n"}
failed = {'msg': 'failed...'}
headers = {
   'Host':'www.kuaishou.com',
   'Connection':'keep-alive',
   'Content-Length':'1261',
   'accept':'*/*',
   'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4621.0 Safari/537.36',
   'content-type':'application/json',
   'Origin':'https://www.kuaishou.com',
   'Sec-Fetch-Site':'same-origin',
   'Sec-Fetch-Mode':'cors',
   'Sec-Fetch-Dest':'empty',
   'Referer':'https://www.kuaishou.com/profile', #自己点赞作品主页地址
   'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
   'Cookie':Cookie,

}
r = requests.post(requestUrl, data=json.dumps(data), headers=headers)
r.encoding = 'UTF-8'
html = r.text
return html
def down(feeds,keywork):
for feed in feeds:
   filename = str(feed['photo']['duration']) + '.mp4'
   filepath = folder_path + '/' + keywork + '/'
   if not os.path.exists(filepath + filename):
         progressbar(feed['photo']['photoUrl'],filepath,filename)
         print(filename + ",下载完成")
   else:
         pass
         print(filename + ",已存在，跳过")
def url_response(url,filepath,filename):
r = requests.get(url, stream=True)
with open(filepath, 'wb') as f:
   widgets = ['Progress: ', progressbar.Percentage(), ' ',
   progressbar.Bar(marker='#', left='[', right=']'),
   ' ', progressbar.ETA(), ' ', progressbar.FileTransferSpeed()]
   pbar = progressbar.ProgressBar(widgets=widgets, maxval=total_length).start()
   for chunk in response.iter_content(chunk_size=1):
         if chunk:
            f.write(chunk)
            f.flush()
         pbar.update(len(chunk) + 1)
   pbar.finish()
def progressbar(url,filepath,filename):
if not os.path.exists(filepath):
   os.mkdir(filepath)
start = time.time()
response = requests.get(url, stream=True)
size = 0
chunk_size = 1024
content_size = int(response.headers['content-length'])
if response.status_code == 200:
   print('Start download,:{size:.2f} MB'.format(size = content_size / chunk_size / 1024))
   filename = filename.replace("\n", "")
   filepath = filepath + filename
   try:
         with open(filepath,'wb') as file:
            for data in response.iter_content(chunk_size = chunk_size):
               file.write(data)
               size +=len(data)
               print('\r' + '[下载进度]:%s%.2f%%' % ('>' * int(size * 50 / content_size), float(size / content_size * 100)) ,end=' ')
         end = time.time()
         print('Download completed!,times: %.2f秒' % (end - start))
   except :
         pass


if __name__ == "__main__":
keyWork = 'zan'
links = []
index = ''
# a = ['', '', '', '']
a = ''

pattern = r'\[(\d+),(\d+)\].*\[(\d+),(\d+)\]'

# match = re.search(pattern, a)
#
# print(match.group(3))
# # 输出63
#
# print(match.group(4))
# # 输出1299
# exit()

while pcursor != False:
   pcursor=index
   result = post(cookie,pcursor)
   data = json.loads(result)

   # 判断是否还存在内容
   if "visionProfileLikePhotoList" not in data['data']:
         print('success')
         break

   # 判断是否有下一页的浮标
   if data['data']['visionProfileLikePhotoList']['pcursor'] == '':
         print('success')
         break

   # 赋值下一页的浮标
   index = data['data']['visionProfileLikePhotoList']['pcursor']
   feeds = data['data']['visionProfileLikePhotoList']['feeds']

   flen = len(feeds)
   if flen == 0:
         print(data['data'])
         print('no videos')
         break
   print(feeds)

   links.append(feeds)

for link in links:
         down(link,keyWork)
print('while done')

iapeng 发表于 2023-11-18 09:10

揭示如下错误：
Traceback (most recent call last):
File "D:\py231117\pytestpro\demo\ksdown.py", line 114, in <module>
result = post(cookie, pcursor)
^^^^^^^^^^^^^^^^^^^^^
File "D:\py231117\pytestpro\demo\ksdown.py", line 36, in post
r = requests.post(requestUrl, data=json.dumps(data), headers=headers)
^^^^^^^^
NameError: name 'requests' is not defined. Did you mean: 'requestUrl'?

Process finished with exit code 1

苏紫方璇 发表于 2023-9-29 22:08

推荐使用此贴中的方法来插入代码
【公告】发帖代码插入以及添加链接教程（有福利）
https://www.52pojie.cn/thread-713042-1-1.html
(出处: 吾爱破解论坛)

吖力锅 发表于 2023-9-29 22:42

牛逼大了，我还不会这样爬

LinliZ 发表于 2023-9-30 08:27

大佬厉害，支持一波

eerrtr3 发表于 2023-9-30 10:46

大佬，有没有KS视频去水印下载啊，没法点赞啊。

HackYike 发表于 2023-9-30 11:47

还是觉得这种代码分享可以多添加注释便于理解，否则懂的人不需要看，不懂的也看不懂

guohuanxian 发表于 2023-9-30 12:30

只能下载点赞的作品吗？

MAOSKE 发表于 2023-9-30 13:52

感谢分享！

siyrra 发表于 2023-10-9 09:24

最近在学python，像是看懂了又好像没看懂

页: [1] 2

吾爱破解 - 52pojie.cn's Archiver

python下载快手个人主页所有已点赞作品