本帖最后由 青史无疆 于 2017-1-8 14:53 编辑
优酷视频下载:
1一:获取m3u8: 用浏览器打开要下的视频播放页,取出http://v.youku.com/v_show/id_XMTkzMTA0NjgyMA==.html
获得vid= “XMTkzMTA0NjgyMA==”;
带入"http://play.youku.com/play/get.json?vid={vid}&ct=12中
得到Url_1= “http://play.youku.com/play/get.json?vid=XMTkzMTA0NjgyMA==&ct=12“。
访问得到:
取出关键数据:
encrypt_string= "NwXYTQoXI7jb0/HE8OJxVYbxuxc+1w7IWxY=" ip= 3526182481 通过算法计算出相关值:
得到ts,type,sid,token,,ep:
带入http://pl.youku.com/playlist/m3u8?vid={vid}&type={type}&ts={ts}&keyframe=1&ep={ep}&sid={sid}&token={token}&ctype=12&ev=1&oip={oip}中,得到:
http://pl.youku.com/playlist/m3u8?vid=XMTkzMTA0NjgyMA==&type=mp4&ts=1483796072&keyframe=1&ep=ciacGU6EUM8I4SbciT8bby23cnAIXJZ3kkTP%2FLYTAMV%2BH6HC6DPcqJyzRvs%3D&sid=04837961845311281e677&token=1384&ctype=12&ev=1&oip=3526182481
Node.js实现以上过程如下:
[JavaScript] 纯文本查看 复制代码 var http = require("http");
var fs = require("fs");
function rc4(a, b) {
for (var c, d = [
], e = 0, f = '', g = 0; g < 256; g++) d[g] = g;
for (g = 0; g < 256; g++) e = (e + d[g] + a.charCodeAt(g % a.length)) % 256,
c = d[g],
d[g] = d[e],
d[e] = c;
g = 0,
e = 0;
for (var h = 0; h < b.length; h++) g = (g + 1) % 256,
e = (e + d[g]) % 256,
c = d[g],
d[g] = d[e],
d[e] = c,
f += String.fromCharCode(b.charCodeAt(h) ^ d[(d[g] + d[e]) % 256]);
return f
}
function translate(a, b) {
for (var c = [
], d = 0; d < a.length; d++) {
var e = 0;
e = a[d] >= 'a' && a[d] <= 'z' ? a[d].charCodeAt(0) - 'a'.charCodeAt(0) : a[d] - '0' + 26;
for (var f = 0; f < 36; f++) if (b[f] == e) {
e = f;
break
}
e > 25 ? c[d] = e - 26 : c[d] = String.fromCharCode(e + 97)
}
return c.join('')
}
function decode64(a) {
if (!a) return '';
a = a.toString();
var b,
c,
d,
e,
f,
g,
h,
i = new Array( - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, 62, - 1, - 1, - 1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 1, - 1, - 1, - 1, - 1, - 1, - 1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 1, - 1, - 1, - 1, - 1, - 1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, - 1, - 1, - 1, - 1, - 1);
for (g = a.length, f = 0, h = ''; f < g; ) {
do b = i[255 & a.charCodeAt(f++)];
while (f < g && b == - 1);
if (b == - 1) break;
do c = i[255 & a.charCodeAt(f++)];
while (f < g && c == - 1);
if (c == - 1) break;
h += String.fromCharCode(b << 2 | (48 & c) >> 4);
do {
if (d = 255 & a.charCodeAt(f++), 61 == d) return h;
d = i[d]
} while (f < g && d == - 1);
if (d == - 1) break;
h += String.fromCharCode((15 & c) << 4 | (60 & d) >> 2);
do {
if (e = 255 & a.charCodeAt(f++), 61 == e) return h;
e = i[e]
} while (f < g && e == - 1);
if (e == - 1) break;
h += String.fromCharCode((3 & d) << 6 | e)
}
return h
}
function encode64(a) {
if (!a) return '';
a = a.toString();
var b,
c,
d,
e,
f,
g,
h = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
for (d = a.length, c = 0, b = ''; c < d; ) {
if (e = 255 & a.charCodeAt(c++), c == d) {
b += h.charAt(e >> 2),
b += h.charAt((3 & e) << 4),
b += '==';
break
}
if (f = a.charCodeAt(c++), c == d) {
b += h.charAt(e >> 2),
b += h.charAt((3 & e) << 4 | (240 & f) >> 4),
b += h.charAt((15 & f) << 2),
b += '=';
break
}
g = a.charCodeAt(c++),
b += h.charAt(e >> 2),
b += h.charAt((3 & e) << 4 | (240 & f) >> 4),
b += h.charAt((15 & f) << 2 | (192 & g) >> 6),
b += h.charAt(63 & g)
}
return b
}
var e = [19,1,4,7,30,14,28,8,24,17,6,35,34,16,9,10,13,22,32,29,31,21,18,3,2,23,25,27,11,20,5,15,12,0,33,26];
a1= '4';
a2= '1';
a3 = 'b4et' ;
a4 = 'boa4';
TypeEnum = {
"flv": 'flv',
'mp4hd': 'mp4',
'mp4hd2': 'flv',
'mp4hd3': 'flv',
'3gphd': 'mp4',
'3gp': 'flv',
'flvhd': 'flv'
}
var URL_1 = "http://play.youku.com/play/get.json?vid={vid}&ct=12";
var URL_2 = "http://pl.youku.com/playlist/m3u8?vid={vid}&type={type}&ts={ts}&keyframe=1&ep={ep}&sid={sid}&token={token}&ctype=12&ev=1&oip={oip}";
function Download_m3u8(b)
{
var ts=parseInt((new Date).getTime() / 1000);
console.log("ts: "+ts);
var type=TypeEnum['mp4hd'];
console.log("type: "+type);
var f = rc4(translate(a3 + 'o0b' + a1, e).toString(), decode64(b.data.security.encrypt_string));
var sid = f.split('_') [0];
console.log("sid: "+sid);
var token = f.split('_') [1];
console.log("token: "+token);
var ep = encodeURIComponent(encode64(rc4(translate(a4 + 'poz' + a2, e).toString(), sid + '_' + vid + '_' + token)));
console.log("ep: "+ep);
var oip = b.data.security.ip;
console.log("oip: "+oip);
var url_2=URL_2.replace(/{vid}/,vid).replace(/{ts}/,ts).replace(/{type}/,type).replace(/{sid}/,sid).replace(/{token}/,token).replace(/{ep}/,ep).replace(/{oip}/,oip);
console.log(url_2);
http.get(url_2,function(res)
{
res.on("error",function(err)
{
console.log(err);
});
var dataArray=[];
res.on("data",function(data)
{
//console.log(data);
dataArray.push(data);
});
res.on("end",function()
{
var fd = fs.openSync(target,"w");
for(var key in dataArray)
{
fs.writeSync(fd,dataArray[key],0,dataArray[key].length,null);
}
fs.closeSync(fd);
console.log("Done!");
});
})
}
var target = process.argv[3]+".m3u8";
//console.log(target);
var vid_Reg = /http:\/\/.*?\/id_(.*?)\.html/
var vid=vid_Reg.exec(process.argv[2])[1];
var url_1=URL_1.replace(/{vid}/,vid);
//console.log(url_1);
http.get(url_1,function(res){
res.on("error",function(err)
{
console.log(err);
});
var jsonString="";
res.on("data",function(data)
{
jsonString+=data.toString();
});
res.on("end",function(){
var jsonObject=JSON.parse(jsonString);
//console.log(jsonObject.data);
Download_m3u8(jsonObject);
})
});
二:解析m3u8文件并下载:
用正则表达式匹配m3u8中的视频地址(Node.js):
由于每个视频比较小,可采用异步下载,这样下载速度比较快,不同网络环境同时下载的最优个数不定,我的电脑上就是10个。
每个任务完成后,如果还有未下载任务,就创建创建下载任务。
下载完成后的到一个堆小视频:
具体代码:
[JavaScript] 纯文本查看 复制代码 /*匹配m3u8文件中的网址的正则表达式*/
var dReg=/https?:\/\/.*?\s/g;
/*存放需要下载文件的信息的数组*/
var objArray=[];
/*同时下载的最大任务数*/
var DownloadThread=10;
var DownloadStartCount=0;
var download=function(p_url,name)
{
var fs=require("fs");
var http=require("http");
var notFound=false;
var fd=fs.openSync(name,"w");
var req=http.get(p_url,function(res)
{
if(res.statusCode == 404)
{
fs.close(fd);
if(DownloadStartCount<objArray.length)
{
download(objArray[DownloadStartCount].url,objArray[DownloadStartCount].name);
++DownloadStartCount;
}
console.log("Not Found:\t"+name);
return;
}
var dataArray=[];
res.on("data",function(data)
{
dataArray.push(data);
});
res.on("end",function()
{
//console.log("end");
for(var key in dataArray)
{
fs.writeSync(fd,dataArray[key],0,dataArray[key].length,null);
}
fs.closeSync(fd);
if(DownloadStartCount<objArray.length)
{
download(objArray[DownloadStartCount].url,objArray[DownloadStartCount].name);
++DownloadStartCount;
}
console.log(name);
})
});
req.on("error",function(error)
{
console.log(error);
});
}
var fs_g=require("fs");
var name = process.argv[2];
var exec=require("child_process").exec;
var path= name+"\\";
var m3u8= name+".m3u8";
fs_g.readFile(m3u8,"utf-8",function(err,data)
{
if(err)
{
console.log(err);
return;
}
var array;
var index=0;
while(array=dReg.exec(data))
{
var obj={};
obj.url=array[0]+"";
obj.name=path+index+".ts";
objArray.push(obj);
++index;
}
for(var start=DownloadStartCount,len=objArray.length;DownloadStartCount<len;)
{
if(DownloadStartCount==start+DownloadThread)
{
break;
}
//console.log(objArray[DownloadStartCount]);
download(objArray[DownloadStartCount].url,objArray[DownloadStartCount].name);
++DownloadStartCount;
}
})
三:合并文件:
先把多个小ts合成一个大ts:
Copy/b “1.ts”+”2.ts”+…”m.ts” /y output.ts
Copy/b “m.ts”+”m+1.ts”+…”n.ts” /y output2.ts
注意:“1.ts”+”2.ts”+…”m.ts”的长度有限制,大概1500个字符左右,所以有时要分段
然后再把多个大ts和成一个完整视频:
ffmpeg -i“concat:output.ts|output2.ts”-c copy -bsf:a aac_adtstoasc -movflags+faststart output.mp4
具体合并代码:
[JavaScript] 纯文本查看 复制代码 var child= require('child_process');
var iconv=require("iconv-lite");
var exec = child.exec;
function execCommand(pCMD)
{
exec(pCMD,{encoding: 'binary'},function(error,stdout,stderror)
{
if(error)
{
console.log("ERROR:\n"+error);
return;
}
if(stdout)
{
var fs=require("fs");
var out=new Buffer(stdout,"binary");
out=iconv.decode(out,'GBK');
console.log("stdout:\n"+out);
}
if(stderror)
{
var errout=new Buffer(stderror,"binary");
errout=iconv.decode(errout,'GBK');
console.log("stderror:\n"+errout);
}
})
}
var cmd="";
var outputTS="concat:";
/**/
var ffmpeg=process.argv[4]+"ffmpeg";
var m3u8=process.argv[2]+".m3u8";
var videoPATH= process.argv[2]+"\\";
var outputMP4= process.argv[2]+".mp4";
var group=process.argv[3];
var fs=require("fs");
var dReg=/https?:\/\/.*?\s/g;
fs.readFile(m3u8,"utf-8",function(err,data)
{
if(err)
{
console.log(err);
return;
}
var end=0;
var start=0;
while(array=dReg.exec(data))
{
++end;
}
var groupNUM=end/group;
//console.log(groupNUM);
execCommand("@echo off");
for(var i=start;i<groupNUM;++i)
{
if(i*group>=end)
{
break;
}
var param="";
for(var j=i*group;j<end&&j<(i+1)*group;++j)
{
param+="\""+videoPATH+j+".ts\"";
if(j<end-1&&j<(i+1)*group-1)
{
param+="+";
}
}
outputTS+=i+".ts";
if(i<groupNUM-1)
{
outputTS+="|";
}
cmd+="copy/b "+param+" /y "+i+".ts";
cmd+="\n";
}
execCommand(cmd);
cmd=ffmpeg+" -i \""+outputTS+"\" -c copy -bsf:a aac_adtstoasc -movflags +faststart "+outputMP4;
execCommand(cmd);
})
到此结束,得到了优酷上的视频。(具体代码见成品)。
这些代码主要关联了:
一:http的get方法。
二:异步下载文件
三:正则表达式
四:动态生成脚本命令
仅供学习交流
百度云:http://pan.baidu.com/s/1eREt9QE
|