Tags: rev 

Rating:

> Santa's nerdy cousin has decorated a chrismas tree...

The first thing that we notice is that the challenge binary is huge compared to the usual ctf binaries.
It takes an input of up to 10 characters and prints some statement about a specific branch.

```
$ ls -alht xmastree
-rwxr-xr-x 1 koyaan koyaan 12M Dec 6 12:19 xmastree
$ ./xmastree 1234567890
This branch has a red ball
```

Opening up `xmastree` in `radare2` we see an enormous main function with a couple hundred thousand basic blocks (im still not exactly sure how many).
Just doing `aa` to analyze basically never finishes. Opening `xmastree` in IDA, lead to the initial analysis running for about 40 minutes on my machine.
But so far so good, increasing the basic block limit for graphing to 1000000 then hitting space and waiting for another couple of minutes we get this highly
informative control flow graph:

![xmastree control-flow graph](https://www.sigflag.at/assets/posts/xmastree/xmas-callgraph.png "Figure 1: xmastree main() control flow graph")

At first i was pretty lost how to even approach this challenge, so i decided to just go ahead and reverse what i could.
Following the flow of the program we can see that there are 3 different hash functions that get called with three characters selected from the input, and the program branches if these hashes
match to some magic values.

```
.text:00000000004006CC loc_4006CC: ; CODE XREF: main+65↑j
.text:00000000004006CC mov eax, 3
.text:00000000004006D1 mov esi, eax ; hash input length (rsi)
.text:00000000004006D3 lea rdx, [rbp+crunch_output] ; hash output char* (rdx)
.text:00000000004006D7 lea rdi, [rbp+hash_input_1] ; hash input char* (rdi)
.text:00000000004006DB mov rcx, ds:input
.text:00000000004006E3 mov r8b, [rcx+7] ; save char 8
.text:00000000004006E7 mov [rbp+hash_input_1], r8b
.text:00000000004006EB mov rcx, ds:input
.text:00000000004006F3 mov r8b, [rcx+1] ; save char 2
.text:00000000004006F7 mov [rbp+hash_input_1+1], r8b
.text:00000000004006FB mov rcx, ds:input
.text:0000000000400703 mov r8b, [rcx+4] ; save char 5
.text:0000000000400707 mov [rbp+hash_input_1+2], r8b
.text:000000000040070B call hash_one_init_struct ; check loads 7,1,4
.text:0000000000400710 lea rdi, [rbp+crunch_output] ; s1
.text:0000000000400714 mov eax, offset hash_1_1 ; 87ef01ed50860be503eee2e15f171df2
.text:0000000000400719 mov esi, eax ; s2
.text:000000000040071B call _strcmp
.text:0000000000400720 cmp eax, 0
.text:0000000000400723 jnz check_1_2
```

If this initial check fails the computed hash gets compared against 2 more values if both of these fail aswell we get the "This branch has a red ball" message and the program exits.

```
.text:000000000067155B check_1_2: ; CODE XREF: main+E3↑j
.text:000000000067155B lea rdi, [rbp+crunch_output] ; s1
.text:000000000067155F mov eax, offset hash_1_2 ; "f954e5e2f33ca65b671360d80df7bd1d"
.text:0000000000671564 mov esi, eax ; s2
.text:0000000000671566 call _strcmp
.text:000000000067156B cmp eax, 0
.text:000000000067156E jnz check_1_3

...

.text:00000000008E23A6 check_1_3: ; CODE XREF: main+270F2E↑j
.text:00000000008E23A6 lea rdi, [rbp+crunch_output] ; s1
.text:00000000008E23AA mov eax, offset hash_1_3 ; "cb2877f6063e950be80bdd9cf63cccad"
.text:00000000008E23AF mov esi, eax ; s2
.text:00000000008E23B1 call _strcmp
.text:00000000008E23B6 cmp eax, 0
.text:00000000008E23B9 jnz print_redball

...

.text:0000000000B531F1 print_redball: ; CODE XREF: main+4E1D79↑j
.text:0000000000B531F1 mov rdi, offset aThisBranchHasA ; "This branch has a red ball\n"
.text:0000000000B531FB mov al, 0
.text:0000000000B531FD call _printf
.text:0000000000B53202 mov [rbp+var_37DCCC], eax
```

If a check passes a new hash is generated with different characters selected from the input and then compared again etc...
Because i missed a crucial bit of information in the strings of the binary, i decided to do a top-down approach.

## Rebuilding the hash functions

I used IDA Pro to decompile the three hash functions, this just entailed creating the structures used by the hash functions,
as example here the one used by the first one:

```
00000000 sphexsmall struc ; (sizeof=0x60, mappedto_3)
00000000 ; XREF: hash_one_init_struct/r
00000000 data db 64 dup(?) ; string(C)
00000040 length dd ?
00000044 db ? ; undefined
00000045 db ? ; undefined
00000046 db ? ; undefined
00000047 db ? ; undefined
00000048 length_o dq ?
00000050 const1 dd ?
00000054 const2 dd ?
00000058 const3 dd ?
0000005C const4 dd ?
00000060 sphexsmall ends
```

And besides that a lot of retyping and renaming to get them all to recompile. I then wrapped them all in a small bruteforcing routine so i could easily generate the 3 characters input needed
to generate each hash value.

```
#include<stdio.h>
#include<string.h>
#include<inttypes.h>
#include "xmastree.h"
#include "crunch_one.h"
#include "crunch_one.c"
#include "crunch_two.h"
#include "crunch_two.c"
#include "crunch_three.h"
#include "crunch_three.c"

int brute();

int main (int argc, char *argv[]) {
int found;

if (argc != 2) {
printf("Usage: ./chash <hash>");
return 1;
}
char *hash = argv[1];

if(strlen(hash) == 32) {
found = brute(hash, hash_one);
} else if(strlen(hash) == 40) {
found = brute(hash, hash_two);
} else if(strlen(hash) == 64) {
found = brute(hash, hash_three);
}
printf("Found %d\n", found);
return 0;
}

int brute(char *target, char*(*hash_func)(char* input, int length, char* out)) {
char input[4];
int lower_bound = 0x20;
int upper_bound = 0x7f;
int x,y,z;
char output[65];
input[3] = 0x00;
int found = 0;

for(x=lower_bound; x<=upper_bound; x++) {
for(y=lower_bound; y<=upper_bound; y++) {
for(z=lower_bound; z<=upper_bound; z++) {
input[0] = x;
input[1] = y;
input[2] = z;
hash_func(input, 3, output);
if(strcmp(output, target) == 0) {
printf("%X%X%X\n%s\n", input[0], input[1], input[2], input);
found++;
return 1;
}
}
}
}
return found;
}
```

## Walk the binary with radare2

Next up i wrote a script using `r2pipe` with bascially walks along the program branches and parses the different types of basic blocks we can encounter
- hash block which generates a new hash value from input characters
- cmp block which compares the previously generated hash to another value
- print block which prints a message
- jump block which usually just jumps to program exit

If we hit a hash block we bruteforce the characters needed to generate the expected hash value and compare this to the constraints already imposed on our input by
previous calculations. Doing this we could quickly see that a lot of branches are impossible to reach because they needed characters at the same position to be different which is obviously impossible.
If we encounter such an impossible condition we just drop this whole path and follow a feasible one.

```python
import r2pipe
import json
from binascii import unhexlify as unhex
import subprocess

r2 = r2pipe.open('./xmastree')

hash_funcs = {
'0xb53220': 'hash_one',
'0xb532e0': 'hash_two',
'0xb533a0': 'hash_three'
}

class State:
empty = '░'

def __init__(self, addr, parent=None, constraint=bytearray(b'\x00'*10)):
self.constraint = constraint
self.addr = addr
self.type = classify(addr)
self.trueconstraint = None
self.offsets = None
self.parent = parent

if self.type == 'hash':
self.block = parsecheck(addr)
self.offsets = self.block["input_offsets"]
elif self.type == 'cmp':
self.block = parsecmp(addr)
self.offsets = parent.offsets
self.constraint = parent.constraint[:]
elif self.type == 'print':
self.block = parseprint(addr)
elif self.type == 'jmp':
self.block = parsejump(addr)
else:
raise Exception("[state] Unknown block type %s" % self.type)

def true(self):
if self.type == "print":
return None
return self.block["true_addr"]

def false(self):
if self.type == "print":
return None
return self.block["false_addr"]

def trueconstrain(self):
input = crack(self.block["hash"])
self.trueconstraint = self.constraint[:]
for k,v in enumerate(self.offsets):
if self.constraint[v] is not 0x00 and self.constraint[v] is not input[k]:
return False
else:
self.trueconstraint[v] = input[k]
return True

def printconstraint(self):
p = [self.empty if c == 0x00 else chr(c) for c in self.constraint]
print("".join(p))

def getconstraint(self):
p = [self.empty if c == 0x00 else chr(c) for c in self.constraint]
return "".join(p)

def printtrueconstraint(self):
p = [self.empty if c == 0x00 else chr(c) for c in self.trueconstraint]
print("".join(p))

def gettrueconstraint(self):
p = [self.empty if c == 0x00 else chr(c) for c in self.trueconstraint]
return "".join(p)

def crack(hash):
try:
output = subprocess.check_output(['./chash', hash]).split(b"\n")
assert len(output) == 4
assert output[2] == b'Found 1'
return output[1]
except subprocess.CalledProcessError:
raise Exception("Unable to crack %s" % hash)

def classify(addr):
check = json.loads(r2.cmd("pdj 4 @ %s" % addr))
if check[0]['disasm'] == 'mov eax, 3':
return "hash"
elif check[3]['disasm'] == 'call sym.imp.strcmp':
return "cmp"
elif check[2]['disasm'] == 'call sym.imp.printf':
return "print"
elif check[0]['type'] == 'jmp':
return "jmp"
else:
raise Exception("unknown block @ %s" % addr)

def parsecheck(check_addr):
# print("Parsing check @ %s" % check_addr)

check = json.loads(r2.cmd("pdj 21 @ %s" % check_addr))
input_offsets = []

# hash input length
assert check[0]['disasm'] == 'mov eax, 3'
checko = {
"addr": check_addr,
"type": "hash",
"input_length": 3
}

# input offsets
for pos in [5,8,11]:
assert check[pos]['disasm'].startswith('mov r8b, byte [rcx')
if check[pos]['size'] == 3: # 448a01 < > 448a4103
offset = 0
else:
offset = ord(unhex(check[pos]['bytes'][-2:]))
assert 0 <= offset < 10
input_offsets.append(offset)
checko["input_offsets"] = input_offsets

# hash func call
assert check[13]['type'] == 'call'
hash_addr = check[13]['disasm'].split(" ")[1]
checko['hash_func'] = hash_funcs[hash_addr]

# load target hash
assert check[15]['disasm'].startswith('mov eax, str.')
str_addr = check[15]['opcode'].split(" ")[2]
hash_target = r2.cmd("ps @ %s" % str_addr)[:-1]
checko['hash'] = hash_target

# fail check address
assert check[19]['disasm'].startswith('jne')
checko['false_addr'] = check[19]['disasm'].split(" ")[1]

# following bb address
next_bb = hex(check[20]['offset'])
checko['true_addr'] = next_bb

return checko

def parsecmp(addr):
# print("Parsing cmp @ %s" % addr)
block = json.loads(r2.cmd("pdj 7 @ %s" % addr))

assert block[3]['disasm'] == 'call sym.imp.strcmp'
assert block[1]['disasm'].startswith('mov eax, str.')
str_addr = block[1]['opcode'].split(" ")[2]
hash_target = r2.cmd("ps @ %s" % str_addr)[:-1]

blocko = {
"addr": addr,
"type": "cmp",
"hash": hash_target,
}

# fail check address
assert block[5]['disasm'].startswith('jne')
blocko['false_addr'] = block[5]['disasm'].split(" ")[1]

# following bb address
next_bb = hex(block[6]['offset'])
blocko['true_addr'] = next_bb
return blocko

def parseprint(addr):
# print("Parsing print @ %s" % addr)
block = json.loads(r2.cmd("pdj 3 @ %s" % addr))
assert block[2]['disasm'] == 'call sym.imp.printf'
assert block[0]['disasm'].startswith('movabs rdi, str.')
outs = r2.cmd("ps @ %s" % block[0]['opcode'].split(" ")[2])
blocko = {
"addr": addr,
"type": "print",
"string": outs
}
return blocko

def parsejump(addr):
# print("Parsing jump @ %s" % addr)
block = json.loads(r2.cmd("pdj 1 @ %s" % addr))
assert block['type'] == 'jmp'
blocko = {
"addr": addr,
"type": "jump",
"dest": hex(block['jump'])
}
return blocko

def walk(s):
stack = []
stack.append(s)
while len(stack) > 0:
w = stack.pop()
print("%5s @ %s - %s %s %s %s" % (w.type, w.addr, w.getconstraint(), w.offsets, w.true(), w.false()))
if w.type == "print":
print(w.block["string"])
if w.type == "hash" or w.type == "cmp":
stack.append(State(w.false(), constraint=w.constraint[:],parent=w))
if w.trueconstrain() is True:
stack.append(State(w.true(), constraint=w.trueconstraint[:]))

start = '0x4006cc'
walk(State(start))
```

Running this we quickly get the flag, output is block type, block address, constraints on the input, selected characters for hashing and the adresses for the true and false branches respectively.

```bash
$ python solvexmastree.py
hash @ 0x4006cc - ░░░░░░░░░░ [7, 1, 4] 0x400729 0x67155b
hash @ 0x400729 - ░5░░m░░q░░ [7, 5, 6] 0x400798 0x4d0c0f
cmp @ 0x4d0c0f - ░5░░m░░q░░ [7, 5, 6] 0x4d0c2b 0x5a10a2
cmp @ 0x5a10a2 - ░5░░m░░q░░ [7, 5, 6] 0x5a10be 0x671535
print @ 0x671535 - ░5░░m░░q░░ None None None
This branch is empty

cmp @ 0x67155b - ░░░░░░░░░░ [7, 1, 4] 0x671574 0x8e23a6
hash @ 0x671574 - ░g░░7░░<░░ [7, 5, 6] 0x6715e3 0x741a5a
cmp @ 0x741a5a - ░g░░7░░<░░ [7, 5, 6] 0x741a76 0x811eed
cmp @ 0x811eed - ░g░░7░░<░░ [7, 5, 6] 0x811f09 0x8e2380
print @ 0x8e2380 - ░g░░7░░<░░ None None None
This branch is empty

cmp @ 0x8e23a6 - ░░░░░░░░░░ [7, 1, 4] 0x8e23bf 0xb531f1
hash @ 0x8e23bf - ░4░░y░░M░░ [7, 5, 6] 0x8e242e 0x9b28a5
cmp @ 0x9b28a5 - ░4░░y░░M░░ [7, 5, 6] 0x9b28c1 0xa82d38
hash @ 0x9b28c1 - ░4░░y_XM░░ [2, 1, 2] 0x9b2930 0x9f7fbe
hash @ 0x9b2930 - ░4$░y_XM░░ [9, 8, 6] 0x9b299f 0x9c9b8a
cmp @ 0x9c9b8a - ░4$░y_XM░░ [9, 8, 6] 0x9c9ba6 0x9e0d91
hash @ 0x9c9ba6 - ░4$░y_XMA5 [1, 9, 4] 0x9c9c15 0x9d171f
hash @ 0x9c9c15 - ░4$░y_XMA5 [3, 5, 5] 0x9c9c84 0x9cc543
cmp @ 0x9cc543 - ░4$░y_XMA5 [3, 5, 5] 0x9cc55f 0x9cee1e
cmp @ 0x9cee1e - ░4$░y_XMA5 [3, 5, 5] 0x9cee3a 0x9d16f9
hash @ 0x9cee3a - ░4$Hy_XMA5 [0, 3, 6] 0x9ceea8 0x9cfbf9
cmp @ 0x9cfbf9 - ░4$Hy_XMA5 [0, 3, 6] 0x9cfc15 0x9d0966
cmp @ 0x9d0966 - ░4$Hy_XMA5 [0, 3, 6] 0x9d0982 0x9d16d3
hash @ 0x9d0982 - H4$Hy_XMA5 [2, 3, 9] 0x9d09f1 0x9d0e1d
hash @ 0x9d09f1 - H4$Hy_XMA5 [4, 0, 8] 0x9d0a5f 0x9d0b7f
cmp @ 0x9d0b7f - H4$Hy_XMA5 [4, 0, 8] 0x9d0b9b 0x9d0cbb
hash @ 0x9d0b9b - H4$Hy_XMA5 [8, 7, 0] 0x9d0c09 0x9d0c25
print @ 0x9d0c09 - H4$Hy_XMA5 None None None
This is the trunk, it has a tiny flag pinned to it!

```

And now you see what i mean by the crucial bit of information i missed, if i had noticed the "This is the trunk, it has a tiny flag pinned to it!" in the strings output i could have gone with a
bottom-up approach and just collected the constraints starting from that point upwards!

All in all a great challenge by Steven which forced me to develop new techniques because a lot of approaches just did not work with a function of this magnitude!

Original writeup (https://www.sigflag.at/blog/2018/writeup-otw-xmastree/).