Issue
I am trying to use Pytesseract to read the digits from the following image:
Unfortunately, the program is not returning with any solution, even after using greyscale, thresholding, noise detection or canny edge detection. When using a config to whitelist only digits and $/, the program stops detecting even the high resolution image. (here)
The code is as follows:
class NumberAnalyser:
# boilerplate code to pre-process image
# get grayscale image
def get_grayscale(self, image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# noise removal
def remove_noise(self, image):
return cv2.medianBlur(image, 5)
# thresholding
def thresholding(self, image):
gray = self.get_grayscale(image)
(T, threshInv) = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# visualize only the masked regions in the image
masked = cv2.bitwise_not(gray, gray, mask=threshInv)
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
ret, thresh2 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
ret, thresh3 = cv2.threshold(gray, 127, 255, cv2.THRESH_TRUNC)
ret, thresh4 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO)
ret, thresh5 = cv2.threshold(gray, 127, 255, cv2.THRESH_TOZERO_INV)
return thresh4
# dilation
def dilate(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.dilate(image, kernel, iterations=1)
# erosion
def erode(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.erode(image, kernel, iterations=1)
# opening - erosion followed by dilation
def opening(self, image):
kernel = np.ones((5, 5), np.uint8)
return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
# canny edge detection
def canny(self, image):
return cv2.Canny(image, 100, 200)
# skew correction
def deskew(self, image):
coords = np.column_stack(np.where(image > 0))
angle = cv2.minAreaRect(coords)[-1]
if angle < -45:
angle = -(90 + angle)
else:
angle = -angle
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
return rotated
# template matching
def match_template(self, image, template):
return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED)
def numbers(self, img_path):
reader = cv2.imread(img_path)
# reader = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_RGB2BGR)'
gray = self.get_grayscale(reader)
thresh = self.thresholding(reader)
opening = self.opening(reader)
canny = self.canny(reader)
noiseless = self.remove_noise(reader)
# cv2.imshow('canny', canny)
# cv2.waitKey(0)
# cv2.imshow('gray', gray)
# cv2.waitKey(0)
cv2.imshow('threshold', thresh)
cv2.waitKey(0)
# cv2.imshow('opening', opening)
# cv2.waitKey(0)
# cv2.imshow('noise removal', noiseless)
# cv2.waitKey(0)
# cv2.imshow('og', reader)
# cv2.waitKey(0)
print('yes')
print(pt.image_to_string(thresh, config='--psm 11, -c tessedit_char_whitelist=$,0123456789'))
The --psm 11 configuration addition/deletion does not change anything.
Any help would be super appreciated!
Solution
You apply multiple simple thresholding consecutively, but you should also test it with other types of thresholding such as adaptive and inRange.
For example, if you use inRange thresholding for the given example:
The result for the high resolution image will be:
The output for the 0.38 version:
20000
4.000
100
The result for the low resolution image will be:
The output for the 0.38 version:
44.900
16.000
34
Unfortunately, only the middle number is recognized correctly. If you set the range values, the resulting image may give a better result.
For more read: Improving the quality of the output Tesseract documentation
Code:
import cv2
import pytesseract
from numpy import array
img = cv2.imread("eO1XG.png") # Load the images: high-res: l9Zbt.png, low-res: eO1XG.png
img = cv2.cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
msk = cv2.inRange(img, array([94, 0, 196]), array([179, 84, 255])) # for low resolution
# msk = cv2.inRange(img, array([0, 0, 0]), array([179, 26, 255])) # for high resolution
krn = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 3))
dlt = cv2.dilate(msk, krn, iterations=1)
thr = 255 - cv2.bitwise_and(dlt, msk)
txt = pytesseract.image_to_string(thr, config='--psm 6 digits')
print(txt)
cv2.imshow("", thr)
cv2.waitKey(0)
Answered By - Ahx
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.