好友
阅读权限35
听众
最后登录1970-1-1
|
本帖最后由 yuupuu 于 2023-11-11 23:23 编辑
简单做了一个改进,可以在服务器运行,然后通过HTTP传参,实现了一个API,可以用来开发网页版或者小程序解析。
[Python] 纯文本查看 复制代码 from flask import Flask, request, jsonify
import requests
import re
app = Flask(__name__)
@app.route('/')
def index():
url = request.args.get('url')
if url:
result = get_image_urls(url)
return jsonify(result)
else:
return jsonify({'error': 'Missing URL parameter'})
def get_html(url):
headers = {
"authority": "www.xiaohongshu.com",
"cache-control": "max-age=0",
"sec-ch-ua": '"Chromium";v="21", " Not;A Brand";v="99"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"sec-fetch-site": "same-origin",
"sec-fetch-mode": "navigate",
"sec-fetch-user": "?1",
"sec-fetch-dest": "document",
"accept-language": "zh-CN,zh;q=0.9",
}
response = requests.get(url, headers=headers)
return response.text
def json_content(html):
rule = r"<script>window\.__INITIAL_STATE__=(.*?)</script>"
js = re.search(rule, html, re.I)
if js:
content = js.group(1)
return content
else:
return None
def get_image_urls(url):
html = get_html(url)
js = json_content(html)
js = js.replace(r"\u002F", r"/")
all_urls = re.findall(r'url":"(http://[^\":\{\}\[\]]*?wm_1)"', js)
return {'image_urls': all_urls}
if __name__ == "__main__":
app.run(debug=True)
返回的是JSON
http://127.0.0.1:5000/?url=http://xhslink.com/wZh4pw
PHP版
[PHP] 纯文本查看 复制代码 <?php
function get_html($url) {
$headers = array(
"authority" => "www.xiaohongshu.com",
"cache-control" => "max-age=0",
"sec-ch-ua" => '"Chromium";v="21", " Not;A Brand";v="99"',
"sec-ch-ua-mobile" => "?0",
"sec-ch-ua-platform" => '"Windows"',
"upgrade-insecure-requests" => "1",
"user-agent" => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36",
"accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"sec-fetch-site" => "same-origin",
"sec-fetch-mode" => "navigate",
"sec-fetch-user" => "?1",
"sec-fetch-dest" => "document",
"accept-language" => "zh-CN,zh;q=0.9",
);
$options = array(
'http' => array(
'header' => implode("\r\n", array_map(
function ($v, $k) {
return $k . ':' . $v;
},
$headers,
array_keys($headers)
)),
),
);
$context = stream_context_create($options);
$response = file_get_contents($url, false, $context);
return $response;
}
function json_content($html) {
$rule = '/<script>window\.__INITIAL_STATE__=(.*?)<\/script>/i';
preg_match($rule, $html, $matches);
if ($matches) {
$content = $matches[1];
return $content;
} else {
return null;
}
}
function get_image_urls($url) {
$html = get_html($url);
$js = json_content($html);
$js = str_replace("\\u002F", "/", $js);
preg_match_all('/"url":"(http:\/\/[^":\{\}\[\]]*?wm_1)"/', $js, $all_urls);
return array('image_urls' => $all_urls[1]);
}
if ($_SERVER['REQUEST_METHOD'] === 'GET') {
$url = $_GET['url'];
if ($url) {
$result = get_image_urls($url);
header('Content-Type: application/json');
echo json_encode($result);
} else {
header('Content-Type: application/json');
echo json_encode(array('error' => 'Missing URL parameter'));
}
}
?>
https://域名/xiaohongshu/?url=http://xhslink.com/wZh4pw |
免费评分
-
查看全部评分
|