%matplotlib inline
from matplotlib.pyplot import imshow #Use Use matplotlib to show the image.
import numpy as np
from PIL import Image
import cv2
# for OCR.
import pyocr
import pyocr.builders
# set tool.
print pyocr.get_available_tools()
tool = pyocr.get_available_tools()[0]
# Only use gray color space. CV2 cannot read .gif files.
im_gray = cv2.cvtColor(cv2.imread('checkcode.png'), cv2.COLOR_BGR2GRAY)
im_gray
imshow(im_gray, cmap = 'gray', interpolation = 'bicubic')
# Filter the background. Using cv2.THRESH_BINARY
retval, im_at_fixed = cv2.threshold(im_gray, 45, 255, cv2.THRESH_BINARY)
imshow(im_at_fixed, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_fixed))
'''
The best result I got by manually tuning the params.
Not possible for crawling purpose.
'''
# Filter the background. Using cv2.ADAPTIVE_THRESH_MEAN_C
im_at_mean = cv2.adaptiveThreshold(im_gray, 255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 5, 70)
plt.imshow(im_at_mean, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_mean))
# Not good.
# Filter the background. Using cv2.ADAPTIVE_THRESH_GAUSSIAN_C
im_at_gau = cv2.adaptiveThreshold(im_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 5, 45)
plt.imshow(im_at_gau, cmap = 'gray')
print tool.image_to_string(Image.fromarray(im_at_gau))
# Not even recognized.
# Read image.
im = Image.open('checkcode (2).gif')
im
# Convert to gray.
imgry = im.convert('L')
imgry
# Manually set the threshold.
threshold = 65
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = imgry.point(table, '1')
out
print tool.image_to_string(out)
# Good, but still not usable.