授人与鱼不如授人与渔,作为初学者,最重要的是学会查看官方文档,自主学习。
首先放上google官方文档对dex解释的链接,里面相当详细的介绍了dex的格式的组成。
https://source.android.google.cn/devices/tech/dalvik/dex-format#header-item
size uint 列表的大小(以条目数表示)
list map_item[size] 列表的元素
map_item格式
type ushort 项的类型;见下表
unused ushort (未使用)
size uint 在指定偏移量处找到的项数量
offset uint 从文件开头到相关项的偏移量
类型格式
header_item TYPE_HEADER_ITEM 0x0000 0x70
string_id_item TYPE_STRING_ID_ITEM 0x0001 0x04
type_id_item TYPE_TYPE_ID_ITEM 0x0002 0x04
proto_id_item TYPE_PROTO_ID_ITEM 0x0003 0x0c
field_id_item TYPE_FIELD_ID_ITEM 0x0004 0x08
method_id_item TYPE_METHOD_ID_ITEM 0x0005 0x08
class_def_item TYPE_CLASS_DEF_ITEM 0x0006 0x20
call_site_id_item TYPE_CALL_SITE_ID_ITEM 0x0007 0x04
method_handle_item TYPE_METHOD_HANDLE_ITEM 0x0008 0x08
map_list TYPE_MAP_LIST 0x1000 4 + (item.size * 12)
type_list TYPE_TYPE_LIST 0x1001 4 + (item.size * 2)
annotation_set_ref_list TYPE_ANNOTATION_SET_REF_LIST 0x1002 4 + (item.size * 4)
annotation_set_item TYPE_ANNOTATION_SET_ITEM 0x1003 4 + (item.size * 4)
class_data_item TYPE_CLASS_DATA_ITEM 0x2000 隐式;必须解析
code_item TYPE_CODE_ITEM 0x2001 隐式;必须解析
string_data_item TYPE_STRING_DATA_ITEM 0x2002 隐式;必须解析
debug_info_item TYPE_DEBUG_INFO_ITEM 0x2003 隐式;必须解析
annotation_item TYPE_ANNOTATION_ITEM 0x2004 隐式;必须解析
encoded_array_item TYPE_ENCODED_ARRAY_ITEM 0x2005 隐式;必须解析
annotations_directory_item TYPE_ANNOTATIONS_DIRECTORY_ITEM 0x2006 隐式;必须解析
hiddenapi_class_data_item TYPE_HIDDENAPI_CLASS_DATA_ITEM 0xF000 隐式;必须解析
有了格式,接下来定义我们需要的结构体
herder_item
#define IMAGE_SIZEOF_DEX_FILE 8
#define IMAGE_SIZEOF_DEX_SIGNATURE 20
typedef struct _IMAGE_DEX_HEADER
{
BYTE magic[IMAGE_SIZEOF_DEX_FILE]; 。
DWORD checksum;
BYTE signature[IMAGE_SIZEOF_DEX_SIGNATURE];
DWORD file_size;
DWORD header_size;
DWORD endian_tag;
DWORD link_size;
DWORD link_off;
DWORD map_off;
DWORD string_ids_size;
DWORD string_ids_off;
DWORD type_ids_size;
DWORD type_ids_off;
DWORD proto_ids_size;
DWORD proto_ids_off;
DWORD field_ids_size;
DWORD field_ids_off;
DWORD method_ids_size;
DWORD method_ids_off;
DWORD class_defs_size;
DWORD class_defs_off;
DWORD data_size;
DWORD data_off;
}IMAGE_DEX_HEADER, * PIMAGE_DEX_HEADER;
map_item
typedef struct _IMAGE_MAP_ITEM
{
WORD type;
WORD unused;
DWORD size;
DWORD offset;
}IMAGE_MAP_ITEM, * PIMAGE_MAP_ITEM;
读取结构体
DWORD my_ReadFile(CString IpzFile)
{
FILE* pFile = NULL;
DWORD fileSize = 0;
LPVOID pTempFileBuffer = NULL;
/* 打开文件*/
pFile = fopen(IpzFile, "rb");
if (!pFile)
{
MessageBox(0, TEXT("打开文件失败"), TEXT("信息"), 0);
return fileSize;
}
/*跳转到文件尾
//因为map_list数据在较后面一部分,所以需要读取全部的数据
*/
fseek(pFile, 0, SEEK_END);
/*
得到文件的大小
*/
fileSize = ftell(pFile);
if (fileSize == -1)
{
fclose(pFile);
return fileSize;
}
/*跳转到文件头部*/
rewind(pFile);
//分配空间
pTempFileBuffer = (LPVOID)malloc(fileSize);
if (!pTempFileBuffer)
{
//失败关闭并返回
fclose(pFile);
return fileSize;
}
size_t n = fread(pTempFileBuffer, fileSize, 1, pFile);
if (!n)
{
fclose(pFile);
free(pTempFileBuffer);
return 0;
}
fclose(pFile);
//强转类型
PIMAGE_DEX_HEADER dex_header=(PIMAGE_DEX_HEADER)pTempFileBuffer;
//dex_header->map_off通过结构体指针获取到map_list的偏移位置,根据上面的map_list结构可以知道,前面的四个字节是列表的大小,所以转换成LPWORD类型的指针,(DWORD)pTempFileBuffer代表文件开始的位置
LPDWORD map_size=(LPDWORD)((DWORD)pTempFileBuffer + dex_header->map_off)
//(map_size+1) DWORD类型的指针加1代表地址值加4,刚好是map_item结构体数组的位置
PIMAGE_MAP_ITEM virtualAddress_map= (PIMAGE_MAP_ITEM)(map_size+1);
//通过循环可以获取到每一个map_item的值
for (size_t i = 0; i < *map_size; i++)
{
DWORD type=(virtualAddress_map+i)->type;
DWORD size=(virtualAddress_map+i)->size;
DWORD offset=(virtualAddress_map+i)->offset;
}
return ;
}
根据type的值转换相对应的类型
定义数组
extern LPSTR mType_Text[21] = { "header_item","string_id_item","type_id_item","proto_id_item","field_id_item",
"method_id_item","class_def_item","call_site_id_item","method_handle_item","map_list","type_list",
"annotation_set_ref_list","annotation_set_item","class_data_item","code_item","string_data_item","debug_info_item",
"annotation_item","encoded_array_item","annotations_directory_item" ,"hiddenapi_class_data_item" };
转换函数
char* getMapType(DWORD index)
{
char* str = NULL;
switch ( index)
{
case 0x0000:
str = mType_Text[0];
break;
case 0x0001:
str = mType_Text[1];
break;
case 0x0002:
str = mType_Text[2];
break;
case 0x0003:
str = mType_Text[3];
break;
case 0x0004:
str = mType_Text[4];
break;
case 0x0005:
str = mType_Text[5];
break;
case 0x0006:
str = mType_Text[6];
break;
case 0x0007:
str = mType_Text[7];
break;
case 0x0008:
str = mType_Text[8];
break;
case 0x1000:
str = mType_Text[9];
break;
case 0x1001:
str = mType_Text[10];
break;
case 0x1002:
str = mType_Text[11];
break;
case 0x1003:
str = mType_Text[12];
break;
case 0x2000:
str = mType_Text[13];
break;
case 0x2001:
str = mType_Text[14];
break;
case 0x2002:
str = mType_Text[15];
break;
case 0x2003:
str = mType_Text[16];
break;
case 0x2004:
str = mType_Text[17];
break;
case 0x2005:
str = mType_Text[18];
break;
case 0x2006:
str = mType_Text[19];
break;
default:
str = mType_Text[20];
break;
}
return str;
}