引言
Sample:aHR0cHM6Ly93d3cuYmlsaWJpbGkuY29tL3ZpZGVvL0JWMWZKNDExUTdWeA==
我在很久以前写过一个小脚本,发XHR请求来获取某视频网站的m4s视频和音频:
// 唯一要改变的就是url(url must be https),a.download是文件名,只有Chrome和Firefox可用
function downloader(url, fname, rangeStart = 0) {
let xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'blob';
// 啥请求头都没法设置:xhr.setRequestHeader("client_type", "DESKTOP_WEB");
xhr.setRequestHeader('Range', `bytes=${rangeStart}-`);
xhr.onload = function () {
if (this.status === 200 || this.status === 206) {
let blob = new Blob([this.response]);
const blobUrl = window.URL.createObjectURL(blob);
let a = document.createElement('a');
a.download = fname;
a.target = '_blank';
a.href = blobUrl;
console.log(a.href, this.response);
document.body.append(a);
a.click();
}
};
xhr.send();
}
// 下载音频(复制到console里,注意分2次执行)
downloader('<m4s audio>', '1.mp3');
downloader('<m4s video>', '1.mp4');
从20年到上个月为止,it just works well。但20240420开始,不知道是Chrome浏览器升级还是什么原因,二进制数据包装的Blob对象转化为a标签后域名变成了https://www.example.comhttps://www.example.com/...
,无法方便地下载m4s了,于是这个脚本不能用了。这个原因我一直都没找到。
作者:hans774882968以及hans774882968以及hans774882968
本文52pojie:https://www.52pojie.cn/thread-1915933-1-1.html
本文juejin:https://juejin.cn/post/7359821247676334115
本文CSDN:https://blog.csdn.net/hans774882968/article/details/138015611
正文
好在我很快又摸索出了一个稍微麻烦点的路子:先在浏览器控制台运行JS代码,获取base64字符串,然后在本地用python把它转为mp3。
在上述古老的脚本里,我将XHR返回的类型指定为Blob了,因此我需要将其转为base64字符串。最后我找到的API是FileReader
。scrapt_m4s_new.js
:
// https://www.cnblogs.com/Sherries/p/14840404.html
function blobToDataURI(blob) {
return new Promise((res, rej) => {
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onload = (e) => {
res(e.target.result);
};
reader.onerror = () => {
rej(new Error('文件流异常'));
};
});
}
// 唯一要改变的就是url(url must be https),a.download是文件名,只有Chrome和Firefox可用
function sendXhrReq(url, rangeStart = 0) {
return new Promise((res, rej) => {
const xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'blob';
// 啥请求头都没法设置:xhr.setRequestHeader("client_type", "DESKTOP_WEB");
xhr.setRequestHeader('Range', `bytes=${rangeStart}-`);
xhr.onload = function () {
if (this.status === 200 || this.status === 206) {
res(this.response);
return;
}
rej(`xhr error: ${this.status} ${this.response}`)
};
xhr.send();
})
}
async function downloader(url, rangeStart = 0) {
const blob = await sendXhrReq(url, rangeStart);
console.log(blob); // type is Blob
const dataURI = await blobToDataURI(blob);
// data:application/octet-stream;base64,
const rawBase64Str = String(dataURI);
const base64Str = rawBase64Str.substring(rawBase64Str.indexOf('base64,') + 7);
console.log(rawBase64Str.substring(0, 100));
console.log(base64Str); // 之后在浏览器控制台复制字符串,粘贴到 base64_str_m4s.txt
}
downloader('<m4s file url>');
base64_str_to_m4s.py
:
import base64
def main():
with open('base64_str_m4s.txt', 'r', encoding='utf-8') as f_in:
inp = f_in.read()
data = base64.b64decode(inp)
with open('1.mp3', 'wb') as f_out:
f_out.write(data)
if __name__ == '__main__':
main()
福利:赠送mp3截取代码mp3_clip.py
,使用的是moviepy
:
from moviepy.editor import AudioFileClip
import os
def clip_audio(fname, st, ed=None):
audio = AudioFileClip(fname).subclip(st, ed)
out_file_name = f'{os.path.splitext(fname)[0]}-out.mp3'
audio.write_audiofile(out_file_name)
def main():
clip_audio('1.mp3', 3.5)
if __name__ == '__main__':
main()
参考资料
blobToDataURI
:https://www.cnblogs.com/Sherries/p/14840404.html