前不久看到一篇关于BibiBili视频地址解析的源码,正好有兴趣,就自己也研究了研究,记录一下。

1.以

http://www.bilibili.com/video/av12535537/ 此视频为例用HttpWatch抓包,很容易就能看出视频的地址是从

https://interface.bilibili.com/playurl?quality=4&player=1&cid=20627240&ts=1504780011&sign=

f37df88434579a4b009a2bee708a2a71&qn=112里获取的

按照常例分析下参数:

cid很容易猜测出是视频的ID号,在网页源码里面也能找到

Quality和qn是视频的清晰度

ts则是时间戳,这个也不用多说

关键就是这个sign,在HttpWatch中是完全搜不到任何相关的信息的,但网页访问了

http://static.hdslb.com/play.swf这个文件,那么sign算法很可能就在这个播放器里。

2.下载这个SWF文件后,载入AS3 Sorcerer,代码一大片不是很好找

那就保存到文本里,在文本中查找”视频地址”这个关键字。

很快就能定位到关键的函数LoadPreview和LoadCidVideo那么接着搜索LoadPreview和LoadCidVideo定位到函数部分,可以看到代码如下

那么关键算法就是在getSign和getSign_v2里面了

[mw_shl_code=actionscript3,true]public function getSign(_arg_1:String):String

{

var _local_2:int;

var _local_4 = null;

var _local_7:int;

var _local_8:int;

var _local_6:int;

var _local_5:int;

var _local_3:int = ESP;

_local_2 = _local_3;

_local_3 = (_local_3 – 48);

_local_5 = 16;

_local_6 = (_local_2 – 37);

ESP = (_local_3 & -16);

_local_7 = CModule.mallocString(_arg_1);

_local_8 = _arg_1.length;

do

{

var _local_9:int = (L__2E_str2 – _local_5);

_local_9 = li8((_local_9 + 16)) /*FlasCC (Alchemy)*/ ;

_local_3 = (_local_3 – 16);

si32(_local_9, (_local_3 + 4)); //FlasCC (Alchemy)

si32(_local_6, _local_3); //FlasCC (Alchemy)

ESP = _local_3;

F_sprintf();

_local_3 = (_local_3 + 16);

_local_5 = (_local_5 + -1);

_local_6 = (_local_6 + 2);

} while (_local_5 != 0);

_local_3 = (_local_3 – 16);

_local_9 = (_local_2 – 4);

si32(_local_9, (_local_3 + 12)); //FlasCC (Alchemy)

_local_9 = (_local_2 – 37);

si32(_local_9, (_local_3 + 8)); //FlasCC (Alchemy)

si32(_local_8, (_local_3 + 4)); //FlasCC (Alchemy)

si32(_local_7, _local_3); //FlasCC (Alchemy)

ESP = _local_3;

F_get_sign();

_local_3 = (_local_3 + 16);

_local_5 = eax;

_local_9 = li32((_local_2 – 4)) /*FlasCC (Alchemy)*/ ;

_local_4 = CModule.readString(_local_5, _local_9);

if (_local_7 != 0)

{

_local_3 = (_local_3 – 16);

si32(_local_7, _local_3); //FlasCC (Alchemy)

ESP = _local_3;

F_IDAlloc();

_local_3 = (_local_3 + 16);

};

if (_local_5 != 0)

{

_local_3 = (_local_3 – 16);

si32(_local_5, _local_3); //FlasCC (Alchemy)

ESP = _local_3;

F_idalloc();

_local_3 = (_local_3 + 16);

};

var _local_10 = _local_4;

_local_3 = _local_2;

ESP = _local_3;

return (_local_10);

}

}//package com.bilibili.interfaces[/mw_shl_code]

[mw_shl_code=actionscript3,true]public function getSign_v2(_arg_1:String, _arg_2:int):String

{

var _local_12:*;

var _local_3:int;

var _local_5 = null;

var _local_6:int;

var _local_11:int;

var _local_8:int;

var _local_10:int;

var _local_9:int;

var _local_4:int = ESP;

_local_3 = _local_4;

_local_4 = (_local_4 – 48);

_local_6 = _arg_2;

if (_local_6 >= 5)

{

_local_12 = _arg_1;

}

else

{

_local_8 = (L__2E_str2 + (_local_6 << 4));

_local_9 = 16;

_local_10 = (_local_3 – 37);

ESP = (_local_4 & -16);

_local_6 = CModule.mallocString(_arg_1);

_local_11 = _arg_1.length;

do

{

var _local_7:int = (_local_8 – _local_9);

_local_7 = li8((_local_7 + 16)) /*FlasCC (Alchemy)*/ ;

_local_4 = (_local_4 – 16);

si32(_local_7, (_local_4 + 4)); //FlasCC (Alchemy)

si32(_local_10, _local_4); //FlasCC (Alchemy)

ESP = _local_4;

F_sprintf();

_local_4 = (_local_4 + 16);

_local_9 = (_local_9 + -1);

_local_10 = (_local_10 + 2);

} while (_local_9 != 0);

_local_4 = (_local_4 – 16);

_local_7 = (_local_3 – 4);

si32(_local_7, (_local_4 + 12)); //FlasCC (Alchemy)

_local_7 = (_local_3 – 37);

si32(_local_7, (_local_4 + 8)); //FlasCC (Alchemy)

si32(_local_11, (_local_4 + 4)); //FlasCC (Alchemy)

si32(_local_6, _local_4); //FlasCC (Alchemy)

ESP = _local_4;

F_get_sign();

_local_4 = (_local_4 + 16);

_local_10 = eax;

_local_7 = li32((_local_3 – 4)) /*FlasCC (Alchemy)*/ ;

_local_5 = CModule.readString(_local_10, _local_7);

if (_local_6 != 0)

{

_local_4 = (_local_4 – 16);

si32(_local_6, _local_4); //FlasCC (Alchemy)

ESP = _local_4;

F_idalloc();

_local_4 = (_local_4 + 16);

};

if (_local_10 != 0)

{

_local_4 = (_local_4 – 16);

si32(_local_10, _local_4); //FlasCC (Alchemy)

ESP = _local_4;

F_idalloc();

_local_4 = (_local_4 + 16);

};

_local_12 = _local_5;

};

_local_4 = _local_3;

ESP = _local_4;

return (_local_12);

}

}//package com.bilibili.interfaces[/mw_shl_code]

可以看到,二者代码非常相似,而且光看代码无法找到有用的信息,那么就得想办法去调试了

3.目前我只知道的办法是:使用JPEXS反编译软件来修改SWF代码(插Log),然后通过Fiddler劫持替换SWF,通过显示Log信息来进行调试

要显示log信息就得安装debug版本的flash

http://www.adobe.com/support/flashplayer/debug_downloads.html

为方便阅读日志信息可以安装Cygwin

具体步骤我也是参考了

http://blog.csdn.net/hot_vc/article/details/50600717这篇文章

————————————————————————————————————————————————————————————————————————————

将SWF载入JPEXS

要想修改SWF的源码就得去修改PCODE,第一次接触这种也不是很懂,觉得有点类似于C#的IL代码

通过AS3代码和PCODE代码的一一进行对比,还是能找到一些规律的

为了查看代码中变量值,用到以下代码

[mw_shl_code=javascript,true]findpropstrict Qname(PackageNamespace(“”),”trace”)

getlocal 5

callpropvoid Qname(PackageNamespace(“”),”trace”) 1[/mw_shl_code]

这个代码就相当于trace(_loc5_);

修改保存后利用Fiddler来劫持替换我们的SWF

这样Cygwin就能输出我们想要看到的结果了。

其中有些变量的值是地址,而不是数据,为了读取地址中的数据,利用到了这句代码CModule.readString(地址,读取长度);

按照原PCODE的格式,写出插LOG时的PCODE

[mw_shl_code=asm,true]pushint 60

setlocal 8

getlex Qname(PackageNamespace(“com.bilibili.interfaces”),”CModule”)

getlocal 7

getlocal 8

callproperty Qname(PackageNamespace(“”),”readString”) 2

coerce_s

setlocal 5

findpropstrict Qname(PackageNamespace(“”),”trace”)

getlocal 5

callpropvoid Qname(PackageNamespace(“”),”trace”) 1[/mw_shl_code]

此代码相当于

_loc8_:int = 60;

_loc5_:* = CModule.readString(_loc7_,_loc8_);

trace(_loc5_);

4.其实关键算法在F_get_sign()中,因为从F_get_sign()函数中出来后,eax中就已经存放带着sign的视频URL的地址了。

F_get_sign()函数过于复杂,总之通过不断地插这两种Log查看变量信息,最终找到了算法

由这个地方可以看出是MD5,还有一个位置能够准确地查看到进行MD5处理的数据(已经忘了…..)

算法如下:

五种清晰度

qn=16,quality=1

qn=32,quality=5

qn=48,quality=2

qn=64,quality=2

qn=80,quality=3

qn=112,quality=4

三种类型的视频bili2,bangumi,movie

bili2:

time = F_clock_gettime();//相当于取一个时间随机数

sign = MD5(“cid=” + cid + “&player=1&qn=112&quality=4&ts=” + time + “1c15888dc316e05a15fdd0a02ed6584f”)

url = “https://interface.bilibili.com/playurl?player=1&qn=112&cid=” + cid + “&quality=4&ts=” + time + “&sign=” + sign

bangumi:

time = F_clock_gettime();//相当于取一个时间随机数

sign = MD5(“cid=” + cid + “&module=bangumi” + “&player=1” + “&qn=112” + “&quality=4” + “&ts=” + time + “9b288147e5474dd2aa67085f716c560d”)

url = “https://bangumi.bilibili.com/player/web_api/playurl?cid=” + cid + “&player=1” + “&module=bangumi” + “&qn=112” + “&quality=4&ts=” + time + “&sign=” + sign

movie:

time = F_clock_gettime();//相当于取一个时间随机数

sign = MD5(“cid=” + cid + “&module=movie” + “&player=1” + “&qn=112” + “&quality=4” + “&ts=” + time + “9b288147e5474dd2aa67085f716c560d”)

url = “https://bangumi.bilibili.com/player/web_api/playurl?cid=” + cid + “&player=1” + “&module=movie” + “&qn=112” + “&quality=4&ts=” + time + “&sign=” + sign

也就只分析到了这里

顺便说下,有些tx.acgvideo.com开头的视频无法下载是因为服务器检查了Refer,只要在协议头里面加入Refer:[media]

https://static.hdslb.com/play.swf[/media],就能下载了

水平有限,分析若有什么错误,大家看着办吧

欢迎大家在评论区留下你的意见。谢谢 诸葛商学院

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注