MISC系列(51)--010 Editor模板编写入门
标题: MISC系列(51)--010 Editor模板编写入门```
目录:
☆ 背景介绍
☆ SerializeTest.py
1) 序列化输出结构
☆ SerializeTest.bt
☆ 参考资源
```
☆ 背景介绍
上周在看Zend VM OPcache,它的bin文件格式是版本强相关的,随PHP版本不同需要不同的解析方式。010 Editor提供的模板不适用于我当时看的版本。起初我在原模板基础上小修小改对付着用,后来发现需要修改的地方比较多,也不太适应原作者的解析思路,就打算自己写一个匹配版本的解析模板。
从未完整写过010 Editor模板,直接解析Zend VM OPcache的bin,会碰上很多与目标不直接相关的低级问题,比如基础语法、功能函数等等。决定先写一个Python脚本,对一组稍显复杂的自定义数据结构进行自定义序列化输出,然后写010 Editor模板解析前述序列化数据,以此掌握010 Editor模板基础语法、功能函数。
☆ SerializeTest.py
```python
#! /usr/bin/env python
# -*- encoding: utf-8 -*-
import struct
import hexdump
#
#########################################################################
#
#
# little-endian
#
def i2b_8_l ( n ) :
return( struct.pack( '<B', ( n & 0xff ) ) )
#
# end of i2b_8_l
#
def i2b_32_l ( n ) :
return( struct.pack( '<I', ( n & 0xffffffff ) ) )
#
# end of i2b_32_l
#
def i2b_64_l ( n ) :
return( struct.pack( '<Q', ( n & 0xffffffffffffffff ) ) )
#
# end of i2b_64_l
#
def b2i_8_l ( buf ) :
return( int( struct.unpack( '<B', buf ) ) )
#
# end of b2i_8_l
#
def b2i_32_l ( buf ) :
return( int( struct.unpack( '<I', buf ) ) )
#
# end of b2i_32_l
#
def b2i_64_l ( buf ) :
return( int( struct.unpack( '<Q', buf ) ) )
#
# end of b2i_64_l
#
#
# str转bytes
#
def s2b ( s, e="utf-8" ) :
return( s.encode( encoding=e ) )
#
# end of s2b
#
#
# bytes转str
#
def b2s ( b, e="utf-8" ) :
return( str( b, encoding=e ) )
#
# end of b2s
#
#
# 替换bytes中指定偏移、指定长度的内容
#
def PatchBytes ( buf, off, size, sth ) :
return( buf + sth + buf )
#
# PatchBytes
#
def WriteBytes ( filename, buf, mode='wb' ) :
with open( filename, mode ) as f :
f.write( buf )
#
# end of WriteBytes
#
#
#########################################################################
#
#
# count + off_array + buf_array
#
def SerializeArray ( array ) :
count = len( array )
off = b''
buf = b''
j = 4 + 4 * count
for i in range( count ) :
off += i2b_32_l( j )
tmp = array.Serialize()
buf += tmp
j += len( tmp )
return( i2b_32_l( count ) + off + buf )
#
# end of SerializeArray
#
#
# count + off_array + buf_array
#
def DeserializeArray ( buf, cls ) :
if ( len( buf ) < 4 ) :
return( None )
count = b2i_32_l( buf )
if ( 4 + 4 * count > len( buf ) ) :
return( None )
array = []
for i in range( count ) :
j = b2i_32_l( buf )
tmp = cls.Deserialize( buf )
if ( tmp is None ) :
return( None )
array.append( tmp )
return( array )
#
# end of DeserializeArray
#
#
#########################################################################
#
#
# struct _zend_string {
# uint32_t id;
# size_t len;
# char val;
# }
#
class MyString () :
def __init__ ( self, id, sth ) :
self.id = id
self.sth = sth
#
# end of __init__
#
def __repr__ ( self ) :
return( "MyString [%u:%s]" % ( self.id, self.sth ) )
#
# end of __repr__
#
def show ( self ) :
print( "MyString [%u:%s]" % ( self.id, self.sth ) )
#
# end of show
#
#
# 返回bytes
#
def Serialize ( self ) :
b = s2b( self.sth )
return( i2b_32_l( self.id ) + i2b_32_l( len( b ) ) + b )
#
# end of Serialize
#
@staticmethod
def Deserialize ( buf ) :
if ( len( buf ) < 8 ) :
return( None )
id= b2i_32_l( buf )
i = b2i_32_l( buf )
if ( 8 + i > len( buf ) ) :
return( None )
sth = b2s( buf )
return( MyString( id, sth ) )
#
# end of Deserialize
#
# #
# # count + off_array + buf_array
# #
# @staticmethod
# def SerializeArray ( array ) :
# count = len( array )
# off = b''
# buf = b''
# j = 4 + 4 * count
# for i in range( count ) :
# off += i2b_32_l( j )
# tmp = array.Serialize()
# buf += tmp
# j += len( tmp )
# return( i2b_32_l( count ) + off + buf )
# #
# # end of SerializeArray
# #
# #
# # count + off_array + buf_array
# #
# @staticmethod
# def DeserializeArray ( buf ) :
# if ( len( buf ) < 4 ) :
# return( None )
# count = b2i_32_l( buf )
# k = 4 + 4 * count
# if ( k > len( buf ) ) :
# return( None )
# array = []
# for i in range( count ) :
# j = b2i_32_l( buf )
# if ( k != j ) :
# return( None )
# tmp = MyString.Deserialize( buf )
# if ( tmp is None ) ;
# return( None )
# array.append( tmp )
# k += len( tmp )
# return( array )
# #
# # end of DeserializeArray
# #
#
# end of MyString
#
#
#########################################################################
#
#
# struct _zval_struct {
# zend_value value;
# union {
# uint32_t type_info;
# } u1;
# union {
# uint32_t lineno;
# } u2;
# }
#
class MyZval () :
def __init__ ( self, value, type_info, lineno ) :
#
# 64位整数
#
self.value = value
#
# 32位整数
#
self.type_info= type_info
self.lineno = lineno
#
# end of __init__
#
def __repr__ ( self ) :
return( "MyZval [%#x:%u:%u]" % ( self.value, self.type_info, self.lineno ) )
#
# end of __repr__
#
def show ( self ) :
print( "MyZval [%#x:%u:%u]" % ( self.value, self.type_info, self.lineno ) )
#
# end of show
#
#
# 返回bytes
#
def Serialize ( self ) :
return( i2b_64_l( self.value ) + i2b_32_l( self.type_info ) + i2b_32_l( self.lineno ) )
#
# end of Serialize
#
@staticmethod
def Deserialize ( buf ) :
if ( len( buf ) < 16 ) :
return( None )
value = b2i_64_l( buf )
type_info = b2i_32_l( buf )
lineno = b2i_32_l( buf )
return( MyZval( value, type_info, lineno ) )
#
# end of Deserialize
#
#
# end of MyZval
#
#
#########################################################################
#
#
# struct _zend_op {
# znode_op op1;
# znode_op op2;
# znode_op result;
# uint32_t lineno;
# zend_uchar opcode;
# zend_uchar op1_type;
# zend_uchar op2_type;
# zend_uchar result_type;
# }
#
class MyZendOp () :
def __init__ ( self, op1, op2, result, lineno, opcode, op1_type, op2_type, result_type ) :
#
# 32位整数
#
self.op1 = op1
self.op2 = op2
self.result = result
self.lineno = lineno
#
# unsigned char
#
self.opcode = opcode
self.op1_type = op1_type
self.op2_type = op2_type
self.result_type = result_type
#
# end of __init__
#
def __repr__ ( self ) :
return( "MyZendOp [%#x:%#x:%#x (%u)]" % ( self.opcode, self.op1, self.op2, self.lineno ) )
#
# end of __repr__
#
def show ( self ) :
print( "MyZendOp [%#x:%#x:%#x (%u)]" % ( self.opcode, self.op1, self.op2, self.lineno ) )
#
# end of show
#
#
# 返回bytes
#
def Serialize ( self ) :
return\
(
i2b_32_l( self.op1 ) +
i2b_32_l( self.op2 ) +
i2b_32_l( self.result ) +
i2b_32_l( self.lineno ) +
i2b_8_l( self.opcode ) +
i2b_8_l( self.op1_type ) +
i2b_8_l( self.op2_type ) +
i2b_8_l( self.result_type )
)
#
# end of Serialize
#
@staticmethod
def Deserialize ( buf ) :
if ( len( buf ) < 20 ) :
return( None )
op1 = b2i_32_l( buf )
op2 = b2i_32_l( buf )
result = b2i_32_l( buf )
lineno = b2i_32_l( buf )
# opcode = b2i_8_l( buf )
# op1_type = b2i_8_l( buf )
# op2_type = b2i_8_l( buf )
# result_type = b2i_8_l( buf )
opcode = buf
op1_type = buf
op2_type = buf
result_type = buf
return( MyZendOp( op1, op2, result, lineno, opcode, op1_type, op2_type, result_type ) )
#
# end of Deserialize
#
#
# end of MyZendOp
#
#
#########################################################################
#
#
# struct _zend_op_array {
# zend_uchar type;
# uint32_t last;
# zend_op *opcodes;
# int last_var;
# zend_string **vars;
# zend_string *filename;
# int last_literal;
# zval *literals;
# }
#
class MyZendOpArray () :
def __init__ ( self, type, opcodes, vars, filename, literals ) :
self.type = type
#
# MyZendOp[]
#
self.opcodes = opcodes
#
# MyString[]
#
self.vars = vars
self.filename = filename
#
# MyZval[]
#
self.literals = literals
#
# end of __init__
#
def __repr__ ( self ) :
return( "MyZendOpArray [%u:%s]" % ( self.type, self.filename ) )
#
# end of __repr__
#
def show ( self ) :
print( "MyZendOpArray [%u:%s]" % ( self.type, self.filename ) )
for i in range( len( self.opcodes ) ) :
print( self.opcodes )
for i in range( len( self.vars ) ) :
print( self.vars )
for i in range( len( self.literals ) ) :
print( self.literals )
#
# end of show
#
#
# 返回bytes
#
# 只是一种演示方案,不是真实案例,有些字段安排甚至说不上合理,源自Zend
# VM OPcache,
#
def Serialize ( self ) :
buf = i2b_8_l( self.type )
last = len( self.opcodes )
buf += i2b_32_l( last )
opcodes_index = len( buf )
#
# 占位
#
buf += i2b_32_l( 0 )
opcodes_buf = SerializeArray( self.opcodes )
last_var = len( self.vars )
buf += i2b_32_l( last_var )
vars_index = len( buf )
#
# 占位
#
buf += i2b_32_l( 0 )
vars_buf = SerializeArray( self.vars )
filename_index= len( buf )
#
# 占位
#
buf += i2b_32_l( 0 )
filename_buf = self.filename.Serialize()
last_literal = len( self.literals )
buf += i2b_32_l( last_literal )
literals_index= len( buf )
#
# 占位
#
buf += i2b_32_l( 0 )
literals_buf = SerializeArray( self.literals )
buf = PatchBytes( buf, opcodes_index, 4, i2b_32_l( len( buf ) ) )
buf += opcodes_buf
buf = PatchBytes( buf, vars_index, 4, i2b_32_l( len( buf ) ) )
buf += vars_buf
buf = PatchBytes( buf, filename_index, 4, i2b_32_l( len( buf ) ) )
buf += filename_buf
buf = PatchBytes( buf, literals_index, 4, i2b_32_l( len( buf ) ) )
buf += literals_buf
return( buf )
#
# end of Serialize
#
@staticmethod
def Deserialize ( buf ) :
if ( len( buf ) < 29 ) :
return( None )
type = b2i_8_l( buf )
last = b2i_32_l( buf )
opcodes_off = b2i_32_l( buf )
opcodes = DeserializeArray( i2b_32_l( last ) + buf, MyZendOp )
if ( opcodes is None ) :
return( None )
last_var = b2i_32_l( buf )
vars_off = b2i_32_l( buf )
vars = DeserializeArray( i2b_32_l( last_var ) + buf, MyString )
if ( vars is None ) :
return( None )
filename_off = b2i_32_l( buf )
filename = MyString.Deserialize( buf )
if ( filename is None ) :
return( None )
last_literal = b2i_32_l( buf )
literals_off = b2i_32_l( buf )
literals = DeserializeArray( i2b_32_l( last_literal ) + buf, MyZval )
if ( literals is None ) :
return( None )
return( MyZendOpArray( type, opcodes, vars, filename, literals ) )
#
# end of Deserialize
#
#
# end of MyZendOpArray
#
#
#########################################################################
#
# mystr = MyString( 0, "Test" )
# mystr.show()
# buf = mystr.Serialize()
# hexdump.hexdump( buf )
# mystr = MyString.Deserialize( buf )
# mystr.show()
#
# myzval = MyZval( 0xffffffff00112233, 6, 12 )
# myzval.show()
# buf = myzval.Serialize()
# hexdump.hexdump( buf )
# myzval = MyZval.Deserialize( buf )
# myzval.show()
#
# myzendop = MyZendOp\
# (
# 1, 2, 3, 4, 5, 6, 7, 8
# )
# myzendop.show()
# buf = myzendop.Serialize()
# hexdump.hexdump( buf )
# myzendop = MyZendOp.Deserialize( buf )
# myzendop.show()
type = 0x41
opcodes = \
[
MyZendOp( 1, 2, 3, 4, 5, 6, 7, 8 ),
MyZendOp( 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38 )
]
vars = \
[
MyString( 0, "Test_0" ),
MyString( 1, "Test_1" )
]
filename = MyString( 2, "Test_2.php" )
literals = \
[
MyZval( 0xffffffff00112233, 6, 12 ),
MyZval( 0xfefefefe44556677, 7, 13 )
]
myzendoparray \
= MyZendOpArray( type, opcodes, vars, filename, literals )
myzendoparray.show()
buf = myzendoparray.Serialize()
hexdump.hexdump( buf )
WriteBytes( 'SerializeTest.bin', buf )
myzendoparray \
= MyZendOpArray.Deserialize( buf )
# print( myzendoparray )
myzendoparray.show()
```
```
$ python3 SerializeTest.py
MyZendOpArray ]
MyZendOp
MyZendOp
MyString
MyString
MyZval
MyZval
00000000: 41 02 00 00 00 1D 00 0000 02 00 00 00 4D 00 00A............M..
00000010: 00 71 00 00 00 02 00 0000 83 00 00 00 0C 00 00.q..............
00000020: 00 20 00 00 00 01 00 0000 02 00 00 00 03 00 00. ..............
00000030: 00 04 00 00 00 05 06 0708 31 00 00 00 32 00 00.........1...2..
00000040: 00 33 00 00 00 34 00 0000 35 36 37 38 0C 00 00.3...4...5678...
00000050: 00 1A 00 00 00 00 00 0000 06 00 00 00 54 65 73.............Tes
00000060: 74 5F 30 01 00 00 00 0600 00 00 54 65 73 74 5Ft_0........Test_
00000070: 31 02 00 00 00 0A 00 0000 54 65 73 74 5F 32 2E1........Test_2.
00000080: 70 68 70 0C 00 00 00 1C00 00 00 33 22 11 00 FFphp........3"...
00000090: FF FF FF 06 00 00 00 0C00 00 00 77 66 55 44 FE...........wfUD.
000000A0: FE FE FE 07 00 00 00 0D00 00 00 ...........
MyZendOpArray ]
MyZendOp
MyZendOp
MyString
MyString
MyZval
MyZval
```
1) 序列化输出结构
直接看SerializeTest.py的实现就能明白序列化方案,下面简介一二。
SerializeTest.py在序列化输出这个结构:
```cpp
struct _zend_op_array {
zend_uchar type;
uint32_t last;
zend_op *opcodes; // opcodes
int last_var;
zend_string **vars; // vars
zend_string *filename;
int last_literal;
zval *literals; // literals
}
```
这是个简化版结构,但足够复杂。其中vars[]本来是指针数组,但对序列化而言,当成zend_string[]序列化出去没啥不同,毕竟在序列化过程中指针都得换个方式处理。
下面这个结构将所有指针换成some_off的形式:
```cpp
struct _zend_op_array {
zend_uchar type;
uint32_t last;
uint32_t opcodes_off; // opcodes
int last_var;
uint32_t vars_off; // vars
uint32_t filename_off;// filename
int last_literal;
uint32_t literals_off;// literals
}
```
将来序列化输出的格式大致如下:
head + opcodes[] + vars[] + filename + literals[]
head就是上面那个结构,其中的some_off分别对应opcodes[]、vars[]、filename、literals[]的偏移,基址是head的首字节。
序列化输出中的some[]不是简单的结构数组,而是结构数组的序列化输出。本例中some[]的格式大致如下:
off[] + buf[]
off[]是一个偏移数组,靠off定位buf。buf是单个some结构的序列化输出。在SerializeTest.py的实现中,off的基址并不是head的首字节。
参看SerializeTest.py中SerializeArray()、DeserializeArray()的实现。SerializeArray()本来输出的是:
count + off[] + buf[]
off的基址是count的首字节。但some[]不包含count,count被放到head中去了。为啥会这样?本文参考了Zend VM OPcache的实现,在简化结构的同时想展示一些复杂之处,保留了某些原设定。
无论如何,这只是一种自定义序列化方案,仅作演示,没啥道理可言。
☆ SerializeTest.bt
```
$ xxd -g 1 SerializeTest.bin
00000000: 41 02 00 00 00 1d 00 00 00 02 00 00 00 4d 00 00A............M..
00000010: 00 71 00 00 00 02 00 00 00 83 00 00 00 0c 00 00.q..............
00000020: 00 20 00 00 00 01 00 00 00 02 00 00 00 03 00 00. ..............
00000030: 00 04 00 00 00 05 06 07 08 31 00 00 00 32 00 00.........1...2..
00000040: 00 33 00 00 00 34 00 00 00 35 36 37 38 0c 00 00.3...4...5678...
00000050: 00 1a 00 00 00 00 00 00 00 06 00 00 00 54 65 73.............Tes
00000060: 74 5f 30 01 00 00 00 06 00 00 00 54 65 73 74 5ft_0........Test_
00000070: 31 02 00 00 00 0a 00 00 00 54 65 73 74 5f 32 2e1........Test_2.
00000080: 70 68 70 0c 00 00 00 1c 00 00 00 33 22 11 00 ffphp........3"...
00000090: ff ff ff 06 00 00 00 0c 00 00 00 77 66 55 44 fe...........wfUD.
000000a0: fe fe fe 07 00 00 00 0d 00 00 00 ...........
```
SerializeTest.bt是用于解析SerializeTest.bin的010 Editor模板。
```cpp
//
// struct _zend_string {
// uint32_t id;
// size_t len;
// char val;
// }
//
typedef struct _MyString
{
uint id;
uint len;
//
// 自动用前面的len成员定义此处的char[]
//
char val;
//
// Read functions can also be used to show information beside a struct
// without having to open the struct in the Template Results. When
// using read functions with a struct, the read function receives a
// reference to the struct and the '&' symbol should be used when
// declaring the parameter.
//
// read回调会影响GUI中Value列的显示,只是增强显示效果,可以不提供
//
} MyString <read=MyStringRepr>;
string MyStringRepr ( MyString &obj )
{
if ( 0 == obj.len )
{
return( "(null)" );
}
return( obj.val );
}
//
//////////////////////////////////////////////////////////////////////////
//
//
// struct _zend_op {
// znode_op op1;
// znode_op op2;
// znode_op result;
// uint32_t lineno;
// zend_uchar opcode;
// zend_uchar op1_type;
// zend_uchar op2_type;
// zend_uchar result_type;
// }
//
typedef struct _MyZendOp
{
uint op1;
uint op2;
uint result;
uint lineno;
uchar opcode;
uchar op1_type;
uchar op2_type;
uchar result_type;
} MyZendOp <read=MyZendOpRepr>;
string MyZendOpRepr ( MyZendOp &obj )
{
strings;
SPrintf( s, "%#x ( %#x, %#x ) : %u", obj.opcode, obj.op1, obj.op2, obj.lineno );
return( s );
}
//
//////////////////////////////////////////////////////////////////////////
//
//
// struct _zval_struct {
// zend_value value;
// union {
// uint32_t type_info;
// } u1;
// union {
// uint32_t lineno;
// } u2;
// }
//
typedef struct _MyZval
{
uint64value <format=hex>;
uint type_info;
uint lineno;
} MyZval <read=MyZvalRepr>;
string MyZvalRepr ( MyZval &obj )
{
strings;
//
// 010 Editor显示64位整数时有自己的格式符,不完全同C语言
//
SPrintf( s, "0x%Lx : %u : %u", obj.value, obj.type_info, obj.lineno );
return( s );
}
//
//////////////////////////////////////////////////////////////////////////
//
//
// struct _zend_op_array {
// zend_uchar type;
// uint32_t last;
// zend_op *opcodes;
// int last_var;
// zend_string **vars;
// zend_string *filename;
// int last_literal;
// zval *literals;
// }
//
typedef struct _MyZendOpArray
{
local int saved_pos;
//
// local int i, j, k;
//
uchar type;
uint last;
//
// An alternate way of specifying the format for a variable is to use
// the syntax '<format=hex|decimal|octal|binary>' after a variable
// declaration or a typedef.
//
uint opcodes_off <format=hex,hidden=true>;
//
// Returns the current read position of the file. This read position
// is used when defining variables in a Template. Every time a
// variable is defined in a template, the read position moves ahead
// the number of bytes used by the variable.
//
saved_pos = FTell();
//
// Sets the current read position to the address pos.
//
// FSeek( opcodes_off );
// j = opcodes_off - 4;
// for ( i = 0; i < last; i++ )
// {
// //
// // Returns data read from the file at address pos. If no pos is
// // given, pos defaults to the current read position as reported by
// // FTell. These functions can be used in a Template to read data
// // from a file without declaring a variable and note that these
// // functions do not affect the current read position.
// //
// k = ReadUInt( opcodes_off + i * 4 );
// FSeek( j + k );
// MyZendOp myzendop;
// }
//
FSeek( opcodes_off + last * 4 );
MyZendOp opcodes;
FSeek( saved_pos );
uint last_var;
//
// The syntax '<hidden=true>' can be used to hide the display of
// variables in the Template Results.
//
uint vars_off <format=hex,hidden=true>;
saved_pos = FTell();
//
// FSeek( vars_off );
// j = vars_off - 4;
// for ( i = 0; i < last_var; i++ )
// {
// k = ReadUInt( vars_off + i * 4 );
// FSeek( j + k );
// MyString mystring;
// }
//
FSeek( vars_off + last_var * 4 );
MyString vars;
FSeek( saved_pos );
uint filename_off <format=hex,hidden=true>;
saved_pos = FTell();
FSeek( filename_off );
MyString filename ;
FSeek( saved_pos );
uint last_literal;
uint literals_off <format=hex,hidden=true>;
saved_pos = FTell();
//
// FSeek( literals_off );
// j = literals_off - 4;
// for ( i = 0; i < last_literal; i++ )
// {
// k = ReadUInt( literals_off + i * 4 );
// FSeek( j + k );
// MyZvalmyzval;
// }
//
FSeek( literals_off + last_literal * 4 );
MyZval literals;
FSeek( saved_pos );
} MyZendOpArray;
//
//////////////////////////////////////////////////////////////////////////
//
//
// Indicates that all subsequent reads and writes from the file should use
// little-endian byte order.
//
LittleEndian();
MyZendOpArray myzendoparray;
```
在010 Editor中打开SerializeTest.bin,Ctrl-F5打开SerializeTest.bt,F5运行之。
上述模板文件是自解释的,实际试试就知道怎么写、怎么读。从最后一行开始读,依次套用结构、解析成员、显示数据、调用回调等等。
SerializeTest.bin中包含有some_off,但SerializeTest.bt用<hidden=true>将之隐藏了,转而在附近显示反序列化之后的some[],尽可能接近人类可读状态。
MyString是个复杂结构,不是单纯的char[],模板用<read=MyStringRepr>回调函数获取其中的char[],使得GUI中Value列的显示更直观。
010 Editor自带的帮助还可以,我都没怎么看在线帮助,直接看二进制自带的帮助。
☆ 参考资源
```
010 Editor Online Manual
https://www.sweetscape.com/010editor/manual/
``` 好东西!!!!!!!!!!!!!!!!! 好东西!!都是大佬 好东西!!谢谢大佬 论坛里大佬真多 谢谢大佬 好东西!!谢谢楼主 大佬,我知道一家植发特别棒的地方,要不给你推荐一下 说实话大佬,你这真的了不起! 一言不合直接上代码
页:
[1]
2