Tags: python developing ocr 

Rating: 5.0

```python
from PIL import Image
import pytesseract
import requests
import zipfile
import shutil
import time
import re

class Picture(object):
def __init__(self, index):
super(Picture, self).__init__()
self.index = index
self.img = Image.open('chall/%d.png' % index).convert('RGB')
# Rotation using the token position
token_height = 750
for w in range(0, 10):
for h in range(650, 750):
if sum(self.img.getpixel((w, h))) < 765:
token_height = min(h, token_height)
break
if token_height < 688:
self.img = self.img.rotate(180)
# Traces and rotation adjustment
self.trace_first, self.trace_last = (0, 0)
sentence_top, sentence_bottom = (0, 0)
for w in range(0, 10):
for h in range(0, 1400):
if w == 0:
self.trace_first += pow(2, h) * min(1, 765-sum(self.img.getpixel((w, h))))
if w == 9:
self.trace_last += pow(2, h) * min(1, 765-sum(self.img.getpixel((w, h))))
if (91 <= h < 100) or (391 <= h < 400) or (591 <= h < 600):
sentence_top += min(1, 765-sum(self.img.getpixel((w, h))))
if (100 <= h < 109) or (400 <= h < 409) or (600 <= h < 609):
sentence_bottom += min(1, 765-sum(self.img.getpixel((w, h))))
# Rotation adjustment
if sentence_top < sentence_bottom:
self.img = self.img.rotate(180)
self.trace_first = int(bin(self.trace_last)[:1:-1], 2)
self.trace_last = int(bin(self.trace_first)[:1:-1], 2)

# Main
sess = requests.session()
start = time.time()

print('[*] Downloading ZIP ...')

response = sess.get('http://shreddinger.challs.malice.fr/challenge_accepted', stream=True)
if response.status_code == 200:
with open('output/%s_chall.zip' % start, 'wb') as f:
response.raw.decode_content = True
shutil.copyfileobj(response.raw, f)

print('[*] Downloading complete: %.4f' % (time.time() - start))

print('[*] Extracting ZIP ...')

zip_ref = zipfile.ZipFile('output/%s_chall.zip' % start, 'r')
zip_ref.extractall('chall')
zip_ref.close()

print('[*] Reassembling image ...')

pictures_list = [Picture(i) for i in range(0, 100)]
background = Image.new('RGBA', (1000, 1400), (255, 255, 255, 255))
used_pictures = []

for pic in pictures_list:
if pic.trace_first == 0 and pic.trace_last != 0:
break
background.paste(pic.img, (0, 0))
used_pictures.append(pic.index)
previous_pic = pic

for col in range(1, 100):
lighter_img = (1400, None)
for i in range(0, 100):
pic = pictures_list[i]
if pic.index not in used_pictures:
diff = bin(pic.trace_first ^ previous_pic.trace_last)[2:].count('1')
diff += bin((pic.trace_first >> 1) ^ previous_pic.trace_last)[2:].count('1')
diff += bin((pic.trace_first << 1) ^ previous_pic.trace_last)[2:].count('1')
diff = int(diff / 3)
if diff < lighter_img[0]:
lighter_img = (diff, pic)
if lighter_img[1] is None:
break
background.paste(lighter_img[1].img, (10*col, 0))
used_pictures.append(lighter_img[1].index)
previous_pic = lighter_img[1]

background.save('output/%s_out4.png' % start)
print('[*] Reassembling complete: %.4f' % (time.time() - start))

print('[*] Processing OCR ...')

token = background.crop((0, 650, 1000, 750))
text = pytesseract.image_to_string(token)
text = re.sub(r'[^0-9A-Z]', '', text.upper())
text = text.replace('O', '0').replace('P', 'F').replace('I', '1').replace('Z', '2').replace('Q', '0').replace('G', '6').replace('S', '5')

print('[*] Processing OCR complete: %.4f' % (time.time() - start))

if len(text) >= 40:
print('[*] Sending token ...')
response = sess.post('http://shreddinger.challs.malice.fr/', data={'shredded_token': text})
with open('output/%s_response.html' % start, 'a') as logfile:
logfile.write(response.text)
print('[+] Capturing Token: %s' % text)
token.save('output/%s_%s.png' % (start, text))
elif len(text) > 35:
print('[-] Capturing Token: %s' % text)
token.save('output/%s_%s.png' % (start, text))
else:
print('[!] Capturing Token: %s' % text)
```

Original writeup (https://gist.github.com/vonKrafft/5a918fb30bcb53e7f82ff44e2f0864ab).