简单分析MapoEngine虚拟化架构
0x00 前言前段时间想研究虚拟机技术,于是找了了一个简化版的‘VMP'——MapoEngine,这个壳是由看雪大佬开发的,相比VMP来说少了寄存器轮换和vm_key之类,比较适合入门分析。
0x01 准备样本
好的样本是必不可少的,可以通过一些实验性的代码来了解虚拟机的架构,下面是一个简单的循环弹窗代码:
#include "stdafx.h"
#include "windows.h"
#include"MapoSDK.h"
int main()
{
MAPO_PROTECT_START
for(int i=0;i<2;i++)
MessageBox(NULL, _T("hello"), _T("hi"), MB_OK);
MAPO_PROTECT_END
return 0;
}
会被虚拟化的代码如下:
0x02 寻找虚拟机handler
这个虚拟机是非常典型的循环解码字节码执行的架构,而不是类似vmp3.x的线性解码的架构,这种架构可以通过OD记录trace,统计执行次数很快就能确定指令表了(jmp dword ptr ds:)
下面是对应的指令表,一共37条指令
0x03 分析handler
分析handler是一个体力活,因为由于代码膨胀和指令乱序,很容易就晕头转向(对于我这种菜鸟是这样的),可以通过逆向框架Miasm来简化这一工作。
首先让样本跑起来之后然后再dump下来,这时可以载入Miasm进行分析了,由于这个框架对一些多字节的NOP指令(nop eaxnop ax这类)无法反汇编,导致分析失败(框架的bug,提了issue也没回复),可以先ida跑上一段脚本,将多字节nop替换成单字节nop,就是简单的搜索字节替换。
import hexdump
import binascii
import idautils
def replace_inst(buf,to_inst):
tlen = len(buf)
bin_str = ' '.join(["%02X" % ord(x) for x in buf])
print "Searching for: [%s]" % bin_str
ea = MinEA()
ret = []
while True:
ea = FindBinary(ea, SEARCH_DOWN, bin_str)
if ea == idaapi.BADADDR:
break
for i in to_inst:
PatchByte(ea,ord(i))
ea+=1
replace_inst('\x0f\x1f\xc8','\x90\x90\x90')
replace_inst('\x0f\x1f\xd8','\x90\x90\x90')
replace_inst('\x0f\x1f\xd0','\x90\x90\x90')
replace_inst('\x0f\x1f\xe8','\x90\x90\x90')
replace_inst('\x66\x0f\x1f\xf0','\x90\x90\x90\x90')
利用Miasm自带的符号执行引擎,对37个handler进行模拟执行,去掉无关的符号,只关心ESI EBP EDI 内存读写(这个和vmp2.x一致 ESI=vm_eip EBP=vm_esp EDI作为vm_context指针,这块不清楚的同学可以看一下vmp虚拟机的架构的文章)
from miasm2.analysis.machine import Machine
from miasm2.analysis.binary import Container
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.arch.x86.sem import ir_x86_32
from miasm2.arch.x86 import regs
from miasm2.arch.x86.regs import *
from miasm2.expression.expression import *
from miasm2.expression.simplifications import expr_simp
from miasm2.core import asmblock
from miasm2.ir.translators import Translator
from miasm2.core.locationdb import LocationDB
import struct
import binascii
def my_eval_updt_irblock(symb,irb, step=False):
for assignblk in irb:
if step:
pstr='0x%x\t%s\t\t\t\t----------------------- %s'%(assignblk.instr.offset,str(assignblk.instr),binascii.hexlify(assignblk.instr.b))
if 'JMP' not in pstr and 'NOP' not in pstr:
print pstr
#pass
symb.eval_updt_assignblk(assignblk)
dst = symb.eval_expr(symb.ir_arch.IRDst)
return dst
def my_run_at(symb, ircfg, addr, lbl_stop=None, step=False):
while True:
irblock = ircfg.get_block(addr)
if irblock is None:
break
if irblock.loc_key == lbl_stop:
break
addr = my_eval_updt_irblock(symb,irblock, step=step)
return addr
if __name__ == '__main__':
machine = Machine("x86_32")
cont = Container.from_stream(open("test3.exe"))
loc_db = LocationDB()
bs = cont.bin_stream
mdis = machine.dis_engine(bs,loc_ab=cont.loc_db)
mdis.follow_call=True
ira = machine.ir(mdis.loc_db)
for i in range(0,37):
symb = SymbolicExecutionEngine(ira, symbols_init)
ad=struct.unpack('I',bs.getbytes(0x41a606+4*i, 4))
asmcfg = mdis.dis_multiblock(ad)
ircfg = ira.new_ircfg_from_asmcfg(asmcfg)
loc_key=ircfg.get_or_create_loc_key(0x00412587) #在这个地址停止,(检查栈地址,调整栈,防止冲突)
print '----------------------0x%x-------------------'%ad
try:
symbolic_pc = my_run_at(symb,ircfg,ad,loc_key,False)#这里可以启用单步输出指令执行的信息
except:
print 'error'
print state_to_expr(symb) #过滤掉无关的符号
得出输出后根据执行的结果来手动命名指令,大多数很容易就知道执行的指令的含义
这里值得注意的是,有些并不能输出结果,原因可能是操作了不关心的符号或者结果非常复杂,如果对应的handler没输出,这时要启用单步来手动分析对应的handler执行结果。
比如0x4156c0没输出,这时候看函数汇编,很明显就是把虚拟flag寄存器弹到真实的flag寄存器,所以命名为VM_PopRealfd
----------------------0x4156c0-------------------
0x4156c0 PUSH DWORD PTR
0x4156c4 POPFD
0x4123bf MOVZX EAX, WORD PTR
0x412577 MOVSX EAX, BYTE PTR
0x4124f7 MOV AX, 0x8F82
0x4124fb MOVSX AX, BL
0x412501 MOVSX EAX, WORD PTR
0x41250f BSWAP EAX
0x412515 MOV AX, 0x5BFB
0x4125f0 MOV AL, 0xE9
0x4125f2 PUSHFD
0x4125f5 XOR AX, 0x19
0x412584 ADC AL, 0xA
再比如0x41536a,可以看出是退出虚拟机的操作,得到了寄存器的对应关系,命名为VM_Retn:
----------------------0x41536a-------------------
0x41536a PUSH DWORD PTR
0x415372 POP EAX
0x415378 PUSH DWORD PTR
0x41537b POP ECX
0x416bae PUSH DWORD PTR
0x416624 POP EDX
0x416627 PUSH DWORD PTR
0x415539 POP EBX
0x415540 PUSH DWORD PTR
0x414c05 POP ESI
0x414c0a ADD ESP, 0x4
0x414c11 ADD ESP, 0x200
0x414c1a ADD ESP, 0x40
0x414c1d PUSH DWORD PTR
0x416fad POPFD
0x4146de PUSH EBP
0x4146e3 POP ESP
0x4146e4 PUSH DWORD PTR
0x4146ed POP EBP
0x4146f3 PUSH DWORD PTR
0x41722b POP EDI
0x416f06 RET
通过符号执行,可以快速的完成handler的分析工作,以下就是上述代码输出和命名的结果:
----------------------0x4156c0-------------------
VM_PopRealfd
1
----------------------0x41455b-------------------
VM_Extend8Sto32
1
EBP = EBP_init + 0xFFFFFFFD;
@32 = @8?({@8 0 8, 0xFFFFFF 8 32},{@8 0 8, 0x0 8 32});
----------------------0x4155f4-------------------
VM_Imul32
1
@32 = ((@32?({@32 0 32, 0xFFFFFFFF 32 64},{@32 0 32, 0x0 32 64})) * (@32?({@32 0 32, 0xFFFFFFFF 32 64},{@32 0 32, 0x0 32 64})));
@32 = ((@32?({@32 0 32, 0xFFFFFFFF 32 64},{@32 0 32, 0x0 32 64})) * (@32?({@32 0 32, 0xFFFFFFFF 32 64},{@32 0 32, 0x0 32 64})));
----------------------0x4128d4-------------------
VM_Nor32
1
EBP = EBP_init + 0x4;
@32 = (@32 ^ 0xFFFFFFFF) & (@32 ^ 0xFFFFFFFF);
----------------------0x416dbc-------------------
VM_PopR8
2
EBP = EBP_init + 0x1;
ESI = ESI_init + 0x1;
@8?(@8 0 8, 0xFFFFFF 8 32}],@8 0 8, 0x0 8 32}]) = @8;
----------------------0x41578b-------------------
VM_PushI8
2
EBP = EBP_init + 0xFFFFFFFF;
ESI = ESI_init + 0x1;
@8 = @8;
----------------------0x4130db-------------------
VM_PopR32
2
EBP = EBP_init + 0x4;
ESI = ESI_init + 0x1;
@8?(@32 0 8, 0xFFFFFF 8 32}],@32 0 8, 0x0 8 32}]) = @32;
----------------------0x4162df-------------------
VM_GetAddESP
2
EBP = EBP_init + 0xFFFFFFFC;
ESI = ESI_init + 0x1;
@32 = EBP_init + {@8 0 8, 0x0 8 32};
----------------------0x414783-------------------
VM_Nop
1
----------------------0x415304-------------------
VM_ReadDs8
1
EBP = EBP_init + 0x3;
@8 = @8[@32];
----------------------0x4142ed-------------------
VM_PushI32
5
EBP = EBP_init + 0xFFFFFFFC;
ESI = ESI_init + 0x4;
@32 = @32;
----------------------0x4157da-------------------
VM_ShortJMP
10
ESI = ESI_init + ((@32?({@32 0 32, 0xFFFFFFFF 32 64},{@32 0 32, 0x0 32 64})) * (((@32 & {@16 0 16, 0x0 16 32}) >> ({@8 0 8, 0x0 8 32} & 0x1F))?({(@32 & {@16 0 16, 0x0 16 32}) >> ({@8 0 8, 0x0 8 32} & 0x1F) 0 32, 0xFFFFFFFF 32 64},{(@32 & {@16 0 16, 0x0 16 32}) >> ({@8 0 8, 0x0 8 32} & 0x1F) 0 32, 0x0 32 64}))) + 0x9;
----------------------0x41399e-------------------
VM_ReadDs16
1
EBP = EBP_init + 0x2;
@16 = @16[@32];
----------------------0x416b08-------------------
VM_PushR32
2
EBP = EBP_init + 0xFFFFFFFC;
ESI = ESI_init + 0x1;
@32 = @8?(@32 0 8, 0xFFFFFF 8 32}],@32 0 8, 0x0 8 32}]);
----------------------0x41a5c3-------------------
VM_CALL #执行栈上的地址 附带的4字节为返回地址
5
EBP = @32;
ESI = @32;
EDI = @32;
@32 = @32 + 0xFFBE59FF;
@32 = @32;
----------------------0x4161d4-------------------
VM_ReadDs32
1
@32 = @32[@32];
----------------------0x414327-------------------
VM_Neg32_
1
@32 = -@32 + 0x1;
----------------------0x413c7f-------------------
VM_PopR16
2
EBP = EBP_init + 0x2;
ESI = ESI_init + 0x1;
@8?(@16 0 8, 0xFFFFFF 8 32}],@16 0 8, 0x0 8 32}]) = @16;
----------------------0x416d10-------------------
VM_JMP
1
EBP = EBP_init + 0xC;
ESI = @32 0 8, 0x0 8 32} * 0x4 + 0x4];
----------------------0x4156d3-------------------
VM_Popfd
1
@32 = {0x2 0 12, iopl_f_init 12 14, 0x0 14 32};
----------------------0x416583-------------------
VM_PopSp
1
EBP = @32;
----------------------0x413cf3-------------------
VM_Add32
1
EBP = EBP_init + 0x4;
@32 = @32 + @32;
----------------------0x41323e-------------------
VM_WriteDs8
1
EBP = EBP_init + 0x5;
@8[@32] = @8;
----------------------0x4168d7-------------------
VM_Sub32
1
EBP = EBP_init + 0x4;
@32 = @32 + -@32;
----------------------0x41536a-------------------
VM_Retn
1
EBP = @32;
ESI = @32;
EDI = @32;
----------------------0x4126a2-------------------
VM_PushR8
2
EBP = EBP_init + 0xFFFFFFFF;
ESI = ESI_init + 0x1;
@8 = (@8?({@8 0 8, 0xFFFFFF 8 32}] 0 8, 0xFFFFFF 8 32},{@8 0 8, 0x0 8 32}] 0 8, 0x0 8 32}));
----------------------0x41478e-------------------
VM_PushStackTop32
1
EBP = EBP_init + 0xFFFFFFFC;
@32 = @32;
----------------------0x414542-------------------
VM_AddSp
3
EBP = EBP_init + {@16 0 16, 0x0 16 32};
ESI = ESI_init + 0x2;
----------------------0x413515-------------------
VM_WriteDs32
1
EBP = EBP_init + 0x8;
@32[@32] = @32;
----------------------0x415519-------------------
VM_WriteDs16
1
EBP = EBP_init + 0x6;
@16[@32] = @16;
----------------------0x41340c-------------------
VM_PushStackTop32
1
EBP = EBP_init + 0xFFFFFFFC;
@32 = @32;
----------------------0x412f93-------------------
VM_PushR16
2
EBP = EBP_init + 0xFFFFFFFE;
ESI = ESI_init + 0x1;
@16 = (@8?({@16 0 8, 0xFFFFFF 8 32}] 0 16, 0xFFFF 16 32},{@16 0 8, 0x0 8 32}] 0 16, 0x0 16 32}));
----------------------0x415689-------------------
VM_Neg32
1
@32 = -@32;
----------------------0x412de1-------------------
VM_PushI16
1
EBP = EBP_init + 0xFFFFFFFE;
ESI = ESI_init + 0x2;
@16 = @16;
----------------------0x4133f3-------------------
VM_Extend16To32
1
EBP = EBP_init + 0xFFFFFFFE;
@32 = {@16 0 16, 0x0 16 32};
----------------------0x4173b2-------------------
VM_ExecRealInst #变长指令,这里手动分析,不支持的指令(例如浮点之类的)会先退出虚拟机,在执行后重新进入虚拟机,x86的指令被固定在虚拟机字节码中 长度+x86机器码
-1
ESI = ESI_init + 0x1;
EDI = 0x41A473;
----------------------0x413880-------------------
VM_Shr32
1
EBP = EBP_init + 0x4;
@32 = @32 >> ({@8 0 8, 0x0 8 32} & 0x1F);
0x04 简单的反汇编
在上面分析handler之后,就可以实现一个很简陋的反汇编器了,通过跟踪esi,可以知道虚拟机最开始执行的地址为0x42c17c, 反汇编代码如下:
from miasm2.analysis.machine import Machine
from miasm2.analysis.binary import Container
from miasm2.ir.symbexec import SymbolicExecutionEngine
from miasm2.arch.x86.sem import ir_x86_32
from miasm2.arch.x86 import regs
from miasm2.arch.x86.regs import *
from miasm2.expression.expression import *
from miasm2.expression.simplifications import expr_simp
from miasm2.core import asmblock
from miasm2.ir.translators import Translator
from miasm2.core.locationdb import LocationDB
import struct
import binascii
if __name__ == '__main__':
machine = Machine("x86_32")
cont = Container.from_stream(open("test3.exe"))
loc_db = LocationDB()
bs = cont.bin_stream
mdis = machine.dis_engine(bs,loc_ab=cont.loc_db)
mdis.follow_call=True
ira = machine.ir(mdis.loc_db)
class Mapo_Engine():
def __init__(self,bs,veip,handler):
self.bs=bs
self.veip=veip
self.handler=handler
self.bStop=False
self.RegMap={0:'flag',1:'EAX',2:'ECX',3:'EDX',4:'EBX',5:'ESI',6:'EDI',7:'EBP'}#vm_context 对应关系没有寄存器轮换 一一对应的
def GetI8(self,offset):
b=struct.unpack('B',self.bs.getbytes(self.veip+offset ,1))
return b
def GetI16(self,offset):
b=struct.unpack('H',self.bs.getbytes(self.veip+offset ,2))
return b
def GetI32(self,offset):
b=struct.unpack('I',self.bs.getbytes(self.veip+offset ,4))
return b
def GetBytes(self,offset,len):
return bs.getbytes(self.veip+offset ,len)
def GetRegIndex(self,offset):
b=struct.unpack('b',self.bs.getbytes(self.veip+offset ,1))
return (-b)/4
def GetRegDesc(self,index):
return self.RegMap
def VM_PopRealfd(self):
print 'VM_PopRealfd'
self.veip+=1
def VM_Extend8Sto32(self):
print 'VM_Extend8Sto32'
self.veip+=1
def VM_Imul32(self):
print 'VM_Imul32'
self.veip+=1
def VM_Nor32(self):
print 'VM_Nor32'
self.veip+=1
def VM_PopR8(self):
print 'VM_PopR8'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=2
def VM_PushI8(self):
print 'VM_PushI8'+" 0x%x"%self.GetI8(1)
self.veip+=2
def VM_PopR32(self):
print 'VM_PopR32'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=2
def VM_GetAddESP(self):
print 'VM_GetAddESP'+" 0x%x"%self.GetI8(1)
self.veip+=2
def VM_Nop(self):
print 'VM_Nop'
self.veip+=1
def VM_ReadDs8(self):
print 'VM_ReadDs8'
self.veip+=1
def VM_PushI32(self):
print 'VM_PushI32'+" 0x%x"%self.GetI32(1)
self.veip+=5
def VM_ShortJMP(self):
print 'VM_ShortJMP'
self.veip+=5
def VM_ReadDs16(self):
print 'VM_ReadDs16'
self.veip+=1
def VM_PushR32(self):
print 'VM_PushR32'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=2
def VM_CALL(self):
print 'VM_CALL retaddr='+" 0x%x"%self.GetI32(1)
self.veip+=5
def VM_ReadDs32(self):
print 'VM_ReadDs32'
self.veip+=1
def VM_Neg32_(self):
print 'VM_Neg32_'
self.veip+=1
def VM_PopR16(self):
print 'VM_PopR16'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=2
def VM_JMP(self):
print 'VM_JMP'
self.veip+=1
def VM_Popfd(self):
print 'VM_Popfd'
self.veip+=1
def VM_PopSp(self):
print 'VM_PopSp'
self.veip+=1
def VM_Add32(self):
print 'VM_Add32'
self.veip+=1
def VM_WriteDs8(self):
print 'VM_WriteDs8'
self.veip+=1
def VM_Sub32(self):
print 'VM_Sub32'
self.veip+=1
def VM_Retn(self):
print 'VM_Retn'
self.veip+=1
def VM_PushR8(self):
print 'VM_PushR8'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=1
def VM_PushStackTop32(self):
print 'VM_PushStackTop32'
self.veip+=1
def VM_AddSp(self):
print 'VM_AddSp'+" 0x%x"%self.GetI16(1)
self.veip+=3
def VM_WriteDs32(self):
print 'VM_WriteDs32'
self.veip+=1
def VM_WriteDs16(self):
print 'VM_WriteDs16'
self.veip+=1
def VM_PushR16(self):
print 'VM_PushR16'+" "+self.GetRegDesc(self.GetRegIndex(1))
self.veip+=2
def VM_Neg32(self):
print 'VM_Neg32'
self.veip+=1
def VM_PushI16(self):
print 'VM_PushI16'+" 0x%x"%self.GetI16(1)
self.veip+=3
def VM_Extend16To32(self):
print 'VM_Extend16To32'
self.veip+=1
def VM_ExecRealInst(self):
inst_len=self.GetI8(1)
inst=self.GetBytes(2,inst_len)
dis_inst=str(Machine('x86_32').mn.dis(inst,32)).strip()
print 'VM_ExecRealInst{'+dis_inst+'}'
self.veip=self.veip+inst_len+2
if dis_inst=='RET':
self.bStop=True
def VM_Shr32(self):
print 'VM_Shr32'
self.veip+=1
def disam(self):
while not self.bStop:
print '0x%x'%self.veip,
op=self.GetI8(0)
exec'self.'+self.handler+'()'
#handler地址 名称 指令长度
handler={0: , 1: , 2: , 3: , 4: , 5: , 6: , 7: , 8: , 9: , 10: , 11: , 12: , 13: , 14: , 15: , 16: , 17: , 18: , 19: , 20: , 21: , 22: , 23: , 24: , 25: , 26: , 27: , 28: , 29: , 30: , 31: , 32: , 33: , 34: , 35: , 36: }
veip=0x42c17c
ME=Mapo_Engine(bs,veip,handler)
ME.disam()#开始反汇编
注释后的结果如下,这个最好和最开始未被虚拟化的代码对比一下:
0x42c17c VM_PushI32 0x421f21
0x42c181 VM_CALL retaddr= 0x4456ae ;0x421f21这个函数是动态获取MessageBoxW的地址,并执行未虚拟化的代码mov edi,MessageBoxW_addr
0x42c186 VM_ExecRealInst{NOP}
0x42c189 VM_PushI32 0x2
0x42c18e VM_PopR32 ESI ;mov esi,0x2
0x42c190 VM_ExecRealInst{NOP DWORD PTR }
0x42c196 VM_PushI32 0x0
0x42c19b VM_PushI32 0x4020f8;"hi"
0x42c1a0 VM_PushI32 0x402100;"hello"
0x42c1a5 VM_PushI32 0x0
0x42c1aa VM_PushR32 EDI
0x42c1ac VM_CALL retaddr= 0x445727 ;调用 MessageBoxW
0x42c1b1 VM_PushR32 ESI
0x42c1b3 VM_PushI8 0x1
0x42c1b5 VM_Extend8Sto32
0x42c1b6 VM_PopRealfd
0x42c1b7 VM_Sub32 ;sub esi, 1
0x42c1b8 VM_Popfd
0x42c1b9 VM_PopR32 ESI
0x42c1bb VM_PushI32 0x42c1d8 ;下面是经典的vmp式跳转 0x42c1d8是退出循环 0x42c196继续循环
0x42c1c0 VM_PushI32 0x42c196
0x42c1c5 VM_PushR32 flag
0x42c1c7 VM_PushStackTop32
0x42c1c8 VM_Nor32
0x42c1c9 VM_PushI32 0x40
0x42c1ce VM_PushStackTop32
0x42c1cf VM_Nor32
0x42c1d0 VM_Nor32
0x42c1d1 VM_PushI32 0x6
0x42c1d6 VM_Shr32
0x42c1d7 VM_JMP
0x42c1d8 VM_PushI32 0x401035
0x42c1dd VM_ExecRealInst{RET};这里没使用vm_retn退出,直接用ret改变eip到0x401035,不继续进入虚拟机
0x401035的内容如下:
0x05 结束语
本文并不是开发一种通用的分析工具,只是想分享一下分析分方法,如果文中有错误的地方,望大家不吝赐教。
作者对各种工具的运用可以说是炉火纯青了,佩服佩服,写的也比较简单明了 看来mapo被搞指日可待了。 厉害,python和符号执行一直想学,可惜都没有机会 完全看不懂...但是也得谢谢分享....赞一下............... 吾爱的手机排版看着好吃力啊。。 这个一定要支持,感谢分享!!! 膜拜.出了好几篇关于vm壳的精华帖子了.高兴 谢谢分享 学习一下 看看学习下。。。