汇编与反汇编神器Unicorn
我们来先说说Unicorn有啥子卵用。Unicorn 是一款非常优秀的跨平台模拟执行框架,该框架可以跨平台执行Arm, Arm64 (Armv8), M68K, Mips, Sparc, & X86 (include X86_64)等指令集的原生程序。
好了说得那么官方,我们举个例子好了,研究OLLVM的时候是不是很头疼函数的地址,使用Unicorn就可以打印函数注册地址,已经参数名称,用某音的so来演示一下Unicorn的威力
RegisterNatives dvmClass=com/ss/android/common/applog/UserInfo, name=getUserInfo, signature=(ILjava/lang/String;0x2c6c5
RegisterNatives dvmClass=com/ss/android/common/applog/UserInfo, name=getUserInfo, signature=(ILjava/lang/String;0x2c6dd
RegisterNatives dvmClass=com/ss/android/common/applog/UserInfo, name=getUserInfoSkipGet, signature=(ILjava/lang/String;0x2c7b1
RegisterNatives dvmClass=com/ss/android/common/applog/UserInfo, name=getUserInfo, signature=(I0x2c7d1
RegisterNatives dvmClass=com/ss/android/common/applog/UserInfo, name=getPackage, signature=(Ljava/lang/String;)V, fnPtr=unicorn@0x4002e0dd0x2e0dd
很快就可以找到UserInfo的函数地址了,不管是Hook还是直接动态调试都是事半功倍。
(当初我找函数地址,找到想哭)
好了,开始Unicorn的入门
Unicorn 快速入门
多架构
Unicorn 是一款基于qemu模拟器的模拟执行框架,支持Arm, Arm64 (Armv8), M68K, Mips, Sparc, & X86 (include X86_64)等指令集。
多语言
Unicorn 为多种语言提供编程接口比如C/C++、Python、Java 等语言。Unicorn的DLL 可以被更多的语言调用,比如易语言、Delphi,前途无量。
多线程安全
Unicorn 设计之初就考虑到线程安全问题,能够同时并发模拟执行代码,极大的提高了实用性。
虚拟内存
Unicorn 采用虚拟内存机制,使得虚拟CPU的内存与真实CPU的内存隔离。Unicorn 使用如下API来操作内存:
uc_mem_map
uc_mem_read
uc_mem_write
使用uc_mem_map映射内存的时候,address 与 size 都需要与0x1000对齐,也就是0x1000的整数倍,否则会报UC_ERR_ARG 异常。如何动态分配管理内存并实现libc中的malloc功能将在后面的课程中讲解。
Hook 机制
Unicorn的Hook机制为编程控制虚拟CPU提供了便利。
Unicorn 支持多种不同类型的Hook。
大致可以分为(hook_add第一参数,Unicorn常量):
指令执行类
UC_HOOK_INTR
UC_HOOK_INSN
UC_HOOK_CODE
UC_HOOK_BLOCK
内存访问类
UC_HOOK_MEM_READ
UC_HOOK_MEM_WRITE
UC_HOOK_MEM_FETCH
UC_HOOK_MEM_READ_AFTER
UC_HOOK_MEM_PROT
UC_HOOK_MEM_FETCH_INVALID
UC_HOOK_MEM_INVALID
UC_HOOK_MEM_VALID
异常处理类
UC_HOOK_MEM_READ_UNMAPPED
UC_HOOK_MEM_WRITE_UNMAPPED
UC_HOOK_MEM_FETCH_UNMAPPED
调用hook_add函数可添加一个Hook。Unicorn的Hook是链式的,而不是传统Hook的覆盖式,也就是说,可以同时添加多个同类型的Hook,Unicorn会依次调用每一个handler。hook callback 是有作用范围的(见hook_add begin参数)。
我们来写一个举一个简单的栗子:
先装一下Unicorn的导入包
pip install unicorn
然后新建一个py文件
from unicorn import *
from unicorn.arm_const import *
ARM_CODE = b"\x37\x00\xa0\xe3\x03\x10\x42\xe0"
# mov r0, #0x37;
# sub r1, r2, r3
# Test ARM
# callback for tracing instructions
def hook_code(uc, address, size, user_data):
print(">>> Tracing instruction at 0x%x, instruction size = 0x%x" % (address, size))
def test_arm():
print("Emulate ARM code")
try:
# Initialize emulator in ARM mode
mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB) #创建UC对象
# map 2MB memory for this emulation 创建2MB的内存空间
ADDRESS = 0x10000
mu.mem_map(ADDRESS, 2 * 0x10000)
mu.mem_write(ADDRESS, ARM_CODE) #将前面定义的ARM_CODE传入内存空间内,只支持byte
#未开机前寄存器赋值
mu.reg_write(UC_ARM_REG_R0, 0x1234)
mu.reg_write(UC_ARM_REG_R2, 0x6789)
mu.reg_write(UC_ARM_REG_R3, 0x3333)
#添加指令集Hook
# mu.hook_add(UC_HOOK_CODE, hook_code, begin=ADDRESS, end=ADDRESS)
# emulate machine code in infinite time,开机
mu.emu_start(ADDRESS, ADDRESS + len(ARM_CODE))
print("已开机")
#获取计算器结果
r0 = mu.reg_read(UC_ARM_REG_R0)
r1 = mu.reg_read(UC_ARM_REG_R1)
print(">>> R0 = 0x%x" % r0)
print(">>> R1 = 0x%x" % r1)
except UcError as e:
print("ERROR: %s" % e)
test_arm()
我把核心的位置都写了备注,这样很明显了吧
我们看看运行结果
R0寄存器的就变成了0x37,R1=0x3456,
上面我们明明没有对R1寄存器进行任何操作,为什么R1会有值呢?
于是我们引入第二个汇编神器Capstone
其实ARM_CODE = b"\x37\x00\xa0\xe3\x03\x10\x42\xe0"就是对寄存器的操作
我们用Capstone来翻译看看是什么指令
先插个件
pip install capstone
建个py文件
from capstone import *
from capstone.arm import *
CODE = b"\x37\x00\xa0\xe3\x03\x10\x42\xe0"
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
for i in md.disasm(CODE, 0x1000):
print("%x:\t%s\t%s" % (i.address, i.mnemonic, i.op_str))
查看运行结果
这个总是看得懂了吧,就是简单arm的指令R1=R2-R3
接下来你们肯定关心怎么打印地址?怎么让Unicorn想普通模拟器可以单步调试对不对?
无名大佬写了一个调试,我们来看看这个调试器的源码
(本菜是无名大佬的脑残粉)
from unicorn import *
from unicorn import arm_const
from unicorn.arm_const import *
import sys
import hexdump
import capstone as cp
BPT_EXECUTE = 1
BPT_MEMREAD = 2
UDBG_MODE_ALL = 1
UDBG_MODE_FAST = 2
REG_ARM = {arm_const.UC_ARM_REG_R0: "R0",
arm_const.UC_ARM_REG_R1: "R1",
arm_const.UC_ARM_REG_R2: "R2",
arm_const.UC_ARM_REG_R3: "R3",
arm_const.UC_ARM_REG_R4: "R4",
arm_const.UC_ARM_REG_R5: "R5",
arm_const.UC_ARM_REG_R6: "R6",
arm_const.UC_ARM_REG_R7: "R7",
arm_const.UC_ARM_REG_R8: "R8",
arm_const.UC_ARM_REG_R9: "R9",
arm_const.UC_ARM_REG_R10: "R10",
arm_const.UC_ARM_REG_R11: "R11",
arm_const.UC_ARM_REG_R12: "R12",
arm_const.UC_ARM_REG_R13: "R13",
arm_const.UC_ARM_REG_R14: "R14",
arm_const.UC_ARM_REG_R15: "R15",
arm_const.UC_ARM_REG_PC: "PC",
arm_const.UC_ARM_REG_SP: "SP",
arm_const.UC_ARM_REG_LR: "LR"
}
REG_TABLE = {UC_ARCH_ARM: REG_ARM}
def str2int(s):
if s.startswith('0x') or s.startswith("0X"):
return int(s, 16)
return int(s)
def advance_dump(data, base):
PY3K = sys.version_info >= (3, 0)
generator = hexdump.genchunks(data, 16)
retstr = ''
for addr, d in enumerate(generator):
# 00000000:
line = '%08X: ' % (base + addr * 16)
# 00 00 00 00 00 00 00 0000 00 00 00 00 00 00 00
dumpstr = hexdump.dump(d)
line += dumpstr[:8 * 3]
if len(d) > 8:# insert separator if needed
line += ' ' + dumpstr
# ................
# calculate indentation, which may be different for the last line
pad = 2
if len(d) < 16:
pad += 3 * (16 - len(d))
if len(d) <= 8:
pad += 1
line += ' ' * pad
for byte in d:
# printable ASCII range 0x20 to 0x7E
if not PY3K:
byte = ord(byte)
if 0x20 <= byte <= 0x7E:
line += chr(byte)
else:
line += '.'
retstr += line + '\n'
return retstr
def _dbg_trace(mu, address, size, self):
self._tracks.append(address)
if not self._is_step and self._tmp_bpt == 0:
if address not in self._list_bpt:
return
if self._tmp_bpt != address and self._tmp_bpt != 0:
return
return _dbg_trace_internal(mu, address, size, self)
def _dbg_memory(mu, access, address, length, value, self):
pc = mu.reg_read(arm_const.UC_ARM_REG_PC)
print("memory error: pc: %x access: %x address: %x length: %x value: %x" %
(pc, access, address, length, value))
_dbg_trace_internal(mu, pc, 4, self)
mu.emu_stop()
return True
def _dbg_trace_internal(mu, address, size, self):
self._is_step = False
print("======================= Registers =======================")
self.dump_reg()
print("======================= Disassembly =====================")
self.dump_asm(address, size * self.dis_count)
while True:
raw_command = input(">")
if raw_command == '':
raw_command = self._last_command
self._last_command = raw_command
command = []
for c in raw_command.split(" "):
if c != "":
command.append(c)
try:
if command == 'set':
if command == 'reg':# set reg regname value
self.write_reg(command, str2int(command))
elif command == 'bpt':
self.add_bpt(str2int(command))
else:
print("command error see help.")
elif command == 's' or command == 'step':
# self._tmp_bpt = address + size
self._tmp_bpt = 0
self._is_step = True
break
elif command == 'n' or command == 'next':
self._tmp_bpt = address + size
self._is_step = False
break
elif command == 'r' or command == 'run':
self._tmp_bpt = 0
self._is_step = False
break
elif command == 'dump':
if len(command) >= 3:
nsize = str2int(command)
else:
nsize = 4 * 16
self.dump_mem(str2int(command), nsize)
elif command == 'list':
if command == 'bpt':
self.list_bpt()
elif command == 'del':
if command == 'bpt':
self.del_bpt(str2int(command))
elif command == 'stop':
exit(0)
elif command == 't':
self._castone = self._capstone_thumb
print("======================= Disassembly =====================")
self.dump_asm(address, size * self.dis_count)
elif command == 'a':
self._castone = self._capstone_arm
print("======================= Disassembly =====================")
self.dump_asm(address, size * self.dis_count)
elif command == 'f':
print(" == recent ==")
for i in self._tracks[-10:-1]:
print(self.sym_handler(i))
else:
print("Command Not Found!")
except:
print("command error see help.")
class UnicornDebugger:
def __init__(self, mu, mode=UDBG_MODE_ALL):
self._tracks = []
self._mu = mu
self._arch = mu._arch
self._mode = mu._mode
self._list_bpt = []
self._tmp_bpt = 0
self._error = ''
self._last_command = ''
self.dis_count = 5
self._is_step = False
self.sym_handler = self._default_sym_handler
self._capstone_arm = None
self._capstone_thumb = None
if self._arch != UC_ARCH_ARM:
mu.emu_stop()
raise RuntimeError("arch:%d is not supported! " % self._arch)
if self._arch == UC_ARCH_ARM:
capstone_arch = cp.CS_ARCH_ARM
elif self._arch == UC_ARCH_ARM64:
capstone_arch = cp.CS_ARCH_ARM64
elif self._arch == UC_ARCH_X86:
capstone_arch = cp.CS_ARCH_X86
else:
mu.emu_stop()
raise RuntimeError("arch:%d is not supported! " % self._arch)
if self._mode == UC_MODE_THUMB:
capstone_mode = cp.CS_MODE_THUMB
elif self._mode == UC_MODE_ARM:
capstone_mode = cp.CS_MODE_ARM
elif self._mode == UC_MODE_32:
capstone_mode = cp.CS_MODE_32
elif self._mode == UC_MODE_64:
capstone_mode = cp.CS_MODE_64
else:
mu.emu_stop()
raise RuntimeError("mode:%d is not supported! " % self._mode)
self._capstone_thumb = cp.Cs(cp.CS_ARCH_ARM, cp.CS_MODE_THUMB)
self._capstone_arm = cp.Cs(cp.CS_ARCH_ARM, cp.CS_MODE_ARM)
self._capstone = self._capstone_thumb
if mode == UDBG_MODE_ALL:
mu.hook_add(UC_HOOK_CODE, _dbg_trace, self)
mu.hook_add(UC_HOOK_MEM_UNMAPPED, _dbg_memory, self)
mu.hook_add(UC_HOOK_MEM_FETCH_PROT, _dbg_memory, self)
self._regs = REG_TABLE
def dump_mem(self, addr, size):
data = self._mu.mem_read(addr, size)
print(advance_dump(data, addr))
def dump_asm(self, addr, size):
md = self._capstone
code = self._mu.mem_read(addr, size)
count = 0
for ins in md.disasm(code, addr):
if count >= self.dis_count:
break
print("%s:\t%s\t%s" % (self.sym_handler(ins.address), ins.mnemonic, ins.op_str))
def dump_reg(self):
result_format = ''
count = 0
for rid in self._regs:
rname = self._regs
value = self._mu.reg_read(rid)
if count < 4:
result_format = result_format + '' + rname + '=' + hex(value)
count += 1
else:
count = 0
result_format += '\n' + rname + '=' + hex(value)
print(result_format)
def write_reg(self, reg_name, value):
for rid in self._regs:
rname = self._regs
if rname == reg_name:
self._mu.reg_write(rid, value)
return
print(" Reg not found:%s " % reg_name)
def show_help(self):
help_info = """
# commands
# set reg <regname> <value>
# set bpt <addr>
# n
# s
# r
# dump <addr> <size>
# list bpt
# del bpt <addr>
# stop
# a/t change arm/thumb
# f show ins flow
"""
print(help_info)
def list_bpt(self):
for idx in range(len(self._list_bpt)):
print("[%d] %s" % (idx, self.sym_handler(self._list_bpt)))
def add_bpt(self, addr):
self._list_bpt.append(addr)
def del_bpt(self, addr):
self._list_bpt.remove(addr)
def get_tracks(self):
for i in self._tracks[-100:-1]:
# print (self.sym_handler(i))
pass
return self._tracks
def _default_sym_handler(self, address):
return hex(address)
def set_symbol_name_handler(self, handler):
self.sym_handler = handler
def test_arm():
print("Emulate Thumb code")
THUMB = b"\x37\x00\xa0\xe3\x03\x10\x42\xe0"
# sub sp, #0xc
# sub sp, #0xc
# sub sp, #0xc
try:
# Initialize emulator in ARM mrode
mu = Uc(UC_ARCH_ARM, UC_MODE_THUMB)
# map 2MB memory for this emulation
ADDRESS = 0x10000
mu.mem_map(ADDRESS, 2 * 0x10000)
mu.mem_write(ADDRESS, THUMB)
mu.reg_write(UC_ARM_REG_SP, 0x1234)
mu.reg_write(UC_ARM_REG_R2, 0x6789)
# debugger attach
udbg = UnicornDebugger(mu)
udbg.add_bpt(ADDRESS)
# emulate machine code in infinite time
mu.emu_start(ADDRESS, ADDRESS + len(THUMB))
r0 = mu.reg_read(UC_ARM_REG_SP)
r1 = mu.reg_read(UC_ARM_REG_R1)
print(">>> SP = 0x%x" % r0)
print(">>> R1 = 0x%x" % r1)
except UcError as e:
print("ERROR: %s" % e)
test_arm()
我们看看运行结果
寄存器的值,和反编译后的指令都显示出来了
接下来就是输入指令了,step,run,next,这是不是跟F8,F9,F10,步入,步过,运行很像呢
这个大家可以自己去尝试以下,我就直接run了
值都打印出来啦。
这些都是Unicorn的基础,那些大佬已经基于Unicorn写出很多很强大的逆向工具,大家有兴趣可以自己找找
这个不是从看雪那边搬过来的嘛?
加了几个图片 75769837 发表于 2019-9-19 17:45
https://bbs.pediy.com/thread-253868.htm
和这个帖子 和 内容 以及其公布的调用抖音So的项目有啥大点 ...
没看清楚内容么?我说了调试器是无名写的,但是抖音so调用,你有本事倒是做个出来看看啊? 看起来很厉害的亚子 工具呢在哪呢? 75769837 发表于 2019-9-19 16:23
这个不是从看雪那边搬过来的嘛?
加了几个图片
来,你给我搬运一个某音so地址打印 {:1_893:}工具怎么下 大佬来一个汇编入门的自学链接呗 Richor 发表于 2019-9-19 16:26
来,你给我搬运一个某音so地址打印
https://bbs.pediy.com/thread-253868.htm
和这个帖子 和 内容 以及其公布的调用抖音So的项目有啥大点的区别嘛? 虽然看不懂,但感觉好厉害。收藏了,有需要再下载研究一下。