In [21]:
%matplotlib inline
from matplotlib.pyplot import imshow #Use Use matplotlib to show the image.
import numpy as np
from PIL import Image
import cv2

# for OCR.
import pyocr
import pyocr.builders
# set tool.
print pyocr.get_available_tools()
tool = pyocr.get_available_tools()[0]
[<module 'pyocr.tesseract' from '/root/anaconda2/lib/python2.7/site-packages/pyocr/tesseract.pyc'>]

Use CV2

In [87]:
# Only use gray color space. CV2 cannot read .gif files.
im_gray = cv2.cvtColor(cv2.imread('checkcode.png'), cv2.COLOR_BGR2GRAY)
im_gray
Out[87]:
array([[209, 184, 209, ..., 209, 184, 209],
       [184, 255, 255, ..., 255, 255, 184],
       [209,  27, 255, ..., 255, 255, 209],
       ..., 
       [184, 255, 255, ..., 169, 178, 184],
       [194, 255, 255, ..., 255, 255, 209],
       [184, 209, 178, ..., 184, 188, 184]], dtype=uint8)
In [88]:
imshow(im_gray, cmap = 'gray', interpolation = 'bicubic')
Out[88]:
<matplotlib.image.AxesImage at 0x7f1eea601c90>
In [90]:
# Filter the background. Using cv2.THRESH_BINARY
retval, im_at_fixed = cv2.threshold(im_gray, 45, 255, cv2.THRESH_BINARY)
imshow(im_at_fixed, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_fixed))
''' 
The best result I got by manually tuning the params.
Not possible for crawling purpose.
'''
264HZ
In [101]:
# Filter the background. Using cv2.ADAPTIVE_THRESH_MEAN_C
im_at_mean = cv2.adaptiveThreshold(im_gray, 255, 
                                   cv2.ADAPTIVE_THRESH_MEAN_C, 
                                   cv2.THRESH_BINARY, 5, 70)
plt.imshow(im_at_mean, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_mean))
# Not good.
2541;:
In [102]:
# Filter the background. Using cv2.ADAPTIVE_THRESH_GAUSSIAN_C
im_at_gau = cv2.adaptiveThreshold(im_gray, 255, 
                                   cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
                                   cv2.THRESH_BINARY, 5, 45)
plt.imshow(im_at_gau, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_gau))
# Not even recognized.

Try self-contained modules in PIL.

In [115]:
# Read image.
im = Image.open('checkcode (2).gif')
im
Out[115]:
In [116]:
# Convert to gray.
imgry = im.convert('L')
imgry
Out[116]:
In [129]:
# Manually set the threshold.
threshold = 65
table = []
for i in range(256):
    if i < threshold:
        table.append(0)
    else:
        table.append(1)
out = imgry.point(table, '1')
out
Out[129]:
In [130]:
print tool.image_to_string(out)
# Good, but still not usable.
484457

In general, the recognition rate of success is too low to be usable.