本帖最后由 ermao 于 2020-9-23 19:24 编辑
话说
之前的那俩软件是更新不了了,抖音的官方网页接口不给数据了,快手的一直访问频繁,所以,勿催。
前段时间抓了两个抖音的接口,一个是官方好像开放了新的解析接口,另一个是自己瞎拼接测试出来一个未公开的查用户信息的接口,一起分享给大家。
其实还抓了两个快手的接口,但是发现需要网页端的cookie(web_did),需要手动打码获取,失效很快还是解决了did的问题,也做了个接口,就不发了。
代码放到阿里云的函数计算,可以直接托管到公网(腾讯云的云函数还需要自己配置API网关),对于一些需要手动触发的云函数很方便,前不久才知道,安利一下。
代码
'''
# -*- encoding: utf-8 -*-
@ file : douyin.py
@ Time : 2020年09月13日 13:59:59 星期天
@ AuThor : ermao
@ version : 1.0
@ link : https://erma0.gitee.io
@ Desc : 抖音相关接口
'''
import requests
import re
headers = {
'User-Agent':
'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36'
}
def user_info(uid):
url = 'https://www.iesdouyin.com/web/api/v2/user/info/?uid=' + uid
try:
r = requests.get(url).json()
info = r['user_info']
# [print(i, v) for i, v in info.items() if not v] # 删除null/false
return info
except Exception:
return {'error': '出错了!'}
def user_info_url(url):
if 'share/user/' in url:
id = re.findall(r'share/user/(\d+)\??', url)[0]
else:
try:
r = requests.head(url, headers=headers, allow_redirects=False)
id = re.findall(r'share/user/(\d+)\??', r.headers['Location'])[0]
except Exception:
return {'error': 'URL错误!'}
return user_info(id)
def parse_by_url(url):
if '/share/video/' in url:
id = re.findall(r'share/video/(\d+)/?\??', url)[0]
else:
try:
r = requests.head(url, headers=headers, allow_redirects=False)
id = re.findall(r'share/video/(\d+)/?\??',
r.headers['Location'])[0]
except Exception:
return {'error': 'URL错误!'}
return parse_by_id(id)
def parse_by_id(id):
url = 'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids=' + id
try:
r = requests.get(url).json()
video = r['item_list'][0]
return video
except Exception:
return {'error': '出错了!'}
if __name__ == "__main__":
# a = user_info_url('https://v.douyin.com/JBknYQp/')
# a = user_info('72673737181')
a = parse_by_url('https://v.douyin.com/JBBebxv/')
# a = parse_by_id('6808453751930719502')
print(a)
|