Android Linker详解
本文目的
Unidbg在对So进行模拟执行的时候,需要先将So文件加载到内存,配置So的进程映像,然后使用CPU模拟器(Unicorn、Dynamic等)对So进行模拟执行。本文的目的是为了彻底搞懂So文件是如何加载到内存的,以及加载进内存之后做了什么,史无巨细,握住方向盘
Linker入口
我们在Android程序中,往往会使用到JNI编程来加快某些算法的运行或增加APP的逆向难度。当然这不是Android的新特性,它是Java自带的本地编程接口,可以使我们的Java程序能够调用本地语言。
当我们在Android程序想使用本地编译的So库,第一步就是要将So加载进来对吧,Android Studio创建C/C++ Native模板的时候,它会在我们的MainActivity类中加这么一段代码
static{
System.loadLibrary("native-lib");
}
这句代码的作用就是将So加载进来供Android程序来使用,所以以此为入口,开始分析
http://androidxref.com/4.4.4_r1/xref/libcore/luni/src/main/java/java/lang/System.java#525
public static void loadLibrary(String libName) {
Runtime.getRuntime().loadLibrary(libName, VMStack.getCallingClassLoader());
}
又调用了Runtime类的loadLibrary,第二个参数为调用类的ClassLoader
http://androidxref.com/4.4.4_r1/xref/libcore/luni/src/main/java/java/lang/Runtime.java#354
void loadLibrary(String libraryName, ClassLoader loader) {
if (loader != null) {
String filename = loader.findLibrary(libraryName);
if (filename == null) {
throw new UnsatisfiedLinkError("...");
}
String error = doLoad(filename, loader);
if (error != null) {
throw new UnsatisfiedLinkError(error);
}
return;
}
String filename = System.mapLibraryName(libraryName);
List<String> candidates = new ArrayList<String>();
String lastError = null;
for (String directory : mLibPaths) {
String candidate = directory + filename;
candidates.add(candidate);
if (IoUtils.canOpenReadOnly(candidate)) {
String error = doLoad(candidate, loader);
if (error == null) {
return;
}
lastError = error;
}
}
if (lastError != null) {
throw new UnsatisfiedLinkError(lastError);
}
throw new UnsatisfiedLinkError("Library " + libraryName + " not found; tried " + candidates);
}
接着看doLoad方法
http://androidxref.com/4.4.4_r1/xref/libcore/luni/src/main/java/java/lang/Runtime.java#393
private String doLoad(String name, ClassLoader loader) {
String ldLibraryPath = null;
if (loader != null && loader instanceof BaseDexClassLoader) {
ldLibraryPath = ((BaseDexClassLoader) loader).getLdLibraryPath();
}
synchronized (this) {
return nativeLoad(name, loader, ldLibraryPath);
}
}
http://androidxref.com/4.4.4_r1/xref/libcore/luni/src/main/java/java/lang/Runtime.java#426
private static native String nativeLoad(String filename, ClassLoader loader, String ldLibraryPath);
继续往下分析,找到nativeLoad对应的C层函数
http://androidxref.com/4.4.4_r1/xref/art/runtime/native/java_lang_Runtime.cc#43
static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader, jstring javaLdLibraryPath) {
mirror::ClassLoader* classLoader = soa.Decode<mirror::ClassLoader*>(javaLoader);
std::string detail;
JavaVMExt* vm = Runtime::Current()->GetJavaVM();
bool success = vm->LoadNativeLibrary(filename.c_str(), classLoader, detail);
if (success) {
return NULL;
}
env->ExceptionClear();
return env->NewStringUTF(detail.c_str());
}
最关键的函数是vm->LoadNativeLibrary,继续往下跟
http://androidxref.com/4.4.4_r1/xref/art/runtime/jni_internal.cc#3120
bool JavaVMExt::LoadNativeLibrary(const std::string& path, ClassLoader* class_loader,
std::string& detail) {
self->TransitionFromRunnableToSuspended(kWaitingForJniOnLoad);
void* handle = dlopen(path.empty() ? NULL : path.c_str(), RTLD_LAZY);
self->TransitionFromSuspendedToRunnable();
VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_LAZY) returned " << handle << "]";
void* sym = dlsym(handle, "JNI_OnLoad");
if (sym == NULL) {
VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
was_successful = true;
} else {
}
typedef int (*JNI_OnLoadFn)(JavaVM*, void*);
JNI_OnLoadFn jni_on_load = reinterpret_cast<JNI_OnLoadFn>(sym);
ClassLoader* old_class_loader = self->GetClassLoaderOverride();
self->SetClassLoaderOverride(class_loader);
int version = 0;
{
ScopedThreadStateChange tsc(self, kNative);
VLOG(jni) << "[Calling JNI_OnLoad in \"" << path << "\"]";
version = (*jni_on_load)(this, NULL);
}
library->SetResult(was_successful);
return was_successful;
}
我们分析上面的函数知道,此函数主要做了两件事
- 调用dlopen加载So
- 查找So中的JNI_OnLoad函数,并执行
继续往下分析dlopen
http://androidxref.com/4.4.4_r1/xref/bionic/linker/dlfcn.cpp#63
void* dlopen(const char* filename, int flags) {
ScopedPthreadMutexLocker locker(&gDlMutex);
soinfo* result = do_dlopen(filename, flags);
if (result == NULL) {
__bionic_format_dlerror("dlopen failed", linker_get_error_buffer());
return NULL;
}
return result;
}
调用了do_dlopen
So的装载
http://androidxref.com/4.4.4_r1/xref/bionic/linker/linker.cpp#823
soinfo* do_dlopen(const char* name, int flags) {
if ((flags & ~(RTLD_NOW|RTLD_LAZY|RTLD_LOCAL|RTLD_GLOBAL)) != 0) {
DL_ERR("invalid flags to dlopen: %x", flags);
return NULL;
}
set_soinfo_pool_protection(PROT_READ | PROT_WRITE);
soinfo* si = find_library(name);
if (si != NULL) {
si->CallConstructors();
}
set_soinfo_pool_protection(PROT_READ);
return si;
}
分析到这里,终于进入Linker部分了,上面的篇幅我们由System.loadLibrary()方法,找到了Linker的do_dlopen函数,这个函数就可以说是Linker开始加载的地方了。这个函数主要做了两件事
- 调用函数find_library,返回soinfo。soinfo就是so被加载到内存的一个代表,存放了内存中so的信息
- 调用soinfo的CallConstructors函数,做了一些初始化操作(Iint、init.array)
继续分析find_library
http://androidxref.com/4.4.4_r1/xref/bionic/linker/linker.cpp#785
static soinfo* find_library(const char* name) {
soinfo* si = find_library_internal(name);
if (si != NULL) {
si->ref_count++;
}
return si;
}
这个函数的作用很简单
- 调用find_library_internal
- so的引用计数+1
继续分析find_library_internal函数
http://androidxref.com/4.4.4_r1/xref/bionic/linker/linker.cpp#751
static soinfo* find_library_internal(const char* name) {
if (name == NULL) {
return somain;
}
soinfo* si = find_loaded_library(name);
if (si != NULL) {
if (si->flags & FLAG_LINKED) {
return si;
}
DL_ERR("OOPS: recursive link to \"%s\"", si->name);
return NULL;
}
TRACE("[ '%s' has not been loaded yet. Locating...]", name);
si = load_library(name);
if (si == NULL) {
return NULL;
}
TRACE("[ init_library base=0x%08x sz=0x%08x name='%s' ]",
si->base, si->size, si->name);
if (!soinfo_link_image(si)) {
munmap(reinterpret_cast<void*>(si->base), si->size);
soinfo_free(si);
return NULL;
}
return si;
}
这个函数主要做了3个事情:
- 判断想要加载的so是否已经被加载过
- 如果没有被加载过,调用load_library进行加载
- 加载完成后,调用soinfo_link_image函数进行链接
也就体现了我们So装载的主要两个步骤
- So的装载
- So的链接
在上面我们还有一个调用soinfo的CallConstructors函数,这个也可以作为第三个
- So的初始化
那么我们假设我们的So是第一次进行加载,继续分析load_library函数,看看linker如何装载我们的So
http://androidxref.com/4.4.4_r1/xref/bionic/linker/linker.cpp#702
static soinfo* load_library(const char* name) {
int fd = open_library(name);
if (fd == -1) {
DL_ERR("library \"%s\" not found", name);
return NULL;
}
ElfReader elf_reader(name, fd);
if (!elf_reader.Load()) {
return NULL;
}
const char* bname = strrchr(name, '/');
soinfo* si = soinfo_alloc(bname ? bname + 1 : name);
if (si == NULL) {
return NULL;
}
si->base = elf_reader.load_start();
si->size = elf_reader.load_size();
si->load_bias = elf_reader.load_bias();
si->flags = 0;
si->entry = 0;
si->dynamic = NULL;
si->phnum = elf_reader.phdr_count();
si->phdr = elf_reader.loaded_phdr();
return si;
}
那么我们主要来分析elf_reader.Load()函数
http://androidxref.com/4.4.4_r1/xref/bionic/linker/linker_phdr.cpp#134
bool ElfReader::Load() {
return ReadElfHeader() &&
VerifyElfHeader() &&
ReadProgramHeader() &&
ReserveAddressSpace() &&
LoadSegments() &&
FindPhdr();
}
Load函数分别调用了6个函数
- ReadElfHeader 读取ElfHeader
- VerifyElfHeader 验证ElfHeader
- ReadProgramHeader 读取程序头表
- ReserveAddressSpace 准备地址空间
- LoadSegments 加载段
- FindPhdr 寻找Phdr段
从函数名直译,我们也能知道一个大概。下面我们来分析这6个函数
bool ElfReader::ReadElfHeader() {
ssize_t rc = TEMP_FAILURE_RETRY(read(fd_, &header_, sizeof(header_)));
if (rc < 0) {
DL_ERR("can't read file \"%s\": %s", name_, strerror(errno));
return false;
}
if (rc != sizeof(header_)) {
DL_ERR("\"%s\" is too small to be an ELF executable", name_);
return false;
}
return true;
}
bool ElfReader::VerifyElfHeader() {
if (header_.e_ident[EI_MAG0] != ELFMAG0 ||
header_.e_ident[EI_MAG1] != ELFMAG1 ||
header_.e_ident[EI_MAG2] != ELFMAG2 ||
header_.e_ident[EI_MAG3] != ELFMAG3) {
DL_ERR("\"%s\" has bad ELF magic", name_);
return false;
}
if (header_.e_ident[EI_CLASS] != ELFCLASS32) {
DL_ERR("\"%s\" not 32-bit: %d", name_, header_.e_ident[EI_CLASS]);
return false;
}
if (header_.e_ident[EI_DATA] != ELFDATA2LSB) {
DL_ERR("\"%s\" not little-endian: %d", name_, header_.e_ident[EI_DATA]);
return false;
}
if (header_.e_type != ET_DYN) {
DL_ERR("\"%s\" has unexpected e_type: %d", name_, header_.e_type);
return false;
}
if (header_.e_version != EV_CURRENT) {
DL_ERR("\"%s\" has unexpected e_version: %d", name_, header_.e_version);
return false;
}
if (header_.e_machine !=
#ifdef ANDROID_ARM_LINKER
EM_ARM
#elif defined(ANDROID_MIPS_LINKER)
EM_MIPS
#elif defined(ANDROID_X86_LINKER)
EM_386
#endif
) {
DL_ERR("\"%s\" has unexpected e_machine: %d", name_, header_.e_machine);
return false;
}
return true;
}
bool ElfReader::ReadProgramHeader() {
phdr_num_ = header_.e_phnum;
if (phdr_num_ < 1 || phdr_num_ > 65536/sizeof(Elf32_Phdr)) {
DL_ERR("\"%s\" has invalid e_phnum: %d", name_, phdr_num_);
return false;
}
Elf32_Addr page_min = PAGE_START(header_.e_phoff);
Elf32_Addr page_max = PAGE_END(header_.e_phoff + (phdr_num_ * sizeof(Elf32_Phdr)));
Elf32_Addr page_offset = PAGE_OFFSET(header_.e_phoff);
phdr_size_ = page_max - page_min;
void* mmap_result = mmap(NULL, phdr_size_, PROT_READ, MAP_PRIVATE, fd_, page_min);
if (mmap_result == MAP_FAILED) {
DL_ERR("\"%s\" phdr mmap failed: %s", name_, strerror(errno));
return false;
}
phdr_mmap_ = mmap_result;
phdr_table_ = reinterpret_cast<Elf32_Phdr*>(reinterpret_cast<char*>(mmap_result) + page_offset);
return true;
}
bool ElfReader::ReserveAddressSpace() {
Elf32_Addr min_vaddr;
load_size_ = phdr_table_get_load_size(phdr_table_, phdr_num_, &min_vaddr);
if (load_size_ == 0) {
DL_ERR("\"%s\" has no loadable segments", name_);
return false;
}
uint8_t* addr = reinterpret_cast<uint8_t*>(min_vaddr);
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
void* start = mmap(addr, load_size_, PROT_NONE, mmap_flags, -1, 0);
if (start == MAP_FAILED) {
DL_ERR("couldn't reserve %d bytes of address space for \"%s\"", load_size_, name_);
return false;
}
load_start_ = start;
load_bias_ = reinterpret_cast<uint8_t*>(start) - addr;
return true;
}
size_t phdr_table_get_load_size(const Elf32_Phdr* phdr_table,
size_t phdr_count,
Elf32_Addr* out_min_vaddr,
Elf32_Addr* out_max_vaddr)
{
Elf32_Addr min_vaddr = 0xFFFFFFFFU;
Elf32_Addr max_vaddr = 0x00000000U;
bool found_pt_load = false;
for (size_t i = 0; i < phdr_count; ++i) {
const Elf32_Phdr* phdr = &phdr_table[i];
if (phdr->p_type != PT_LOAD) {
continue;
}
found_pt_load = true;
if (phdr->p_vaddr < min_vaddr) {
min_vaddr = phdr->p_vaddr;
}
if (phdr->p_vaddr + phdr->p_memsz > max_vaddr) {
max_vaddr = phdr->p_vaddr + phdr->p_memsz;
}
}
if (!found_pt_load) {
min_vaddr = 0x00000000U;
}
min_vaddr = PAGE_START(min_vaddr);
max_vaddr = PAGE_END(max_vaddr);
if (out_min_vaddr != NULL) {
*out_min_vaddr = min_vaddr;
}
if (out_max_vaddr != NULL) {
*out_max_vaddr = max_vaddr;
}
return max_vaddr - min_vaddr;
}
bool ElfReader::LoadSegments() {
for (size_t i = 0; i < phdr_num_; ++i) {
const Elf32_Phdr* phdr = &phdr_table_[i];
if (phdr->p_type != PT_LOAD) {
continue;
}
Elf32_Addr seg_start = phdr->p_vaddr + load_bias_;
Elf32_Addr seg_end = seg_start + phdr->p_memsz;
Elf32_Addr seg_page_start = PAGE_START(seg_start);
Elf32_Addr seg_page_end = PAGE_END(seg_end);
Elf32_Addr seg_file_end = seg_start + phdr->p_filesz;
Elf32_Addr file_start = phdr->p_offset;
Elf32_Addr file_end = file_start + phdr->p_filesz;
Elf32_Addr file_page_start = PAGE_START(file_start);
Elf32_Addr file_length = file_end - file_page_start;
if (file_length != 0) {
void* seg_addr = mmap((void*)seg_page_start,
file_length,
PFLAGS_TO_PROT(phdr->p_flags),
MAP_FIXED|MAP_PRIVATE,
fd_,
file_page_start);
if (seg_addr == MAP_FAILED) {
DL_ERR("couldn't map \"%s\" segment %d: %s", name_, i, strerror(errno));
return false;
}
}
if ((phdr->p_flags & PF_W) != 0 && PAGE_OFFSET(seg_file_end) > 0) {
memset((void*)seg_file_end, 0, PAGE_SIZE - PAGE_OFFSET(seg_file_end));
}
seg_file_end = PAGE_END(seg_file_end);
if (seg_page_end > seg_file_end) {
void* zeromap = mmap((void*)seg_file_end,
seg_page_end - seg_file_end,
PFLAGS_TO_PROT(phdr->p_flags),
MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE,
-1,
0);
if (zeromap == MAP_FAILED) {
DL_ERR("couldn't zero fill \"%s\" gap: %s", name_, strerror(errno));
return false;
}
}
}
return true;
}
bool ElfReader::FindPhdr() {
const Elf32_Phdr* phdr_limit = phdr_table_ + phdr_num_;
for (const Elf32_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_PHDR) {
return CheckPhdr(load_bias_ + phdr->p_vaddr);
}
}
for (const Elf32_Phdr* phdr = phdr_table_; phdr < phdr_limit; ++phdr) {
if (phdr->p_type == PT_LOAD) {
if (phdr->p_offset == 0) {
Elf32_Addr elf_addr = load_bias_ + phdr->p_vaddr;
const Elf32_Ehdr* ehdr = (const Elf32_Ehdr*)(void*)elf_addr;
Elf32_Addr offset = ehdr->e_phoff;
return CheckPhdr((Elf32_Addr)ehdr + offset);
}
break;
}
}
DL_ERR("can't find loaded phdr for \"%s\"", name_);
return false;
}
至此 So的装载部分就分析完了
总结
总结一下So的装载就是根据So的文件信息,先读入So的头部信息,并进行验证。然后找到段表的位置,遍历段表的每一个段,根据PT_LOAD段指定的信息将So进行装载,如果我们要模拟这个过程,只需要注意一下细节就可以了。相对于So的装载,更难的部分是So的动态链接,我们另起一篇文章来讲解So的动态链接。