Monday, February 28, 2022

[FIXED] Tesseract not detecting any text on RGB images on Python

February 28, 2022 image-processing, python, python-tesseract, tesseract No comments

Issue

Hey I started working with Tesseract OCR but I'm having problems getting the text from really simple RGB images. It works just fine with text2image images. Here is my code:

from PIL import Image
import pytesseract
import argparse
import cv2
import os
import sys


class wordExtractor():
    def __init__(self, image_path):
        self.image_path = image_path
        pytesseract.pytesseract.tesseract_cmd = r'/home/yarin/tesseract/bin/debug/tesseract'
        #self.resize_image()

def resize_image(self):
    basewidth = 800
    img = Image.open(self.image_path)
    wpercent = (basewidth/float(img.size[0]))
    hsize = int((float(img.size[1])*float(wpercent)))
    img = img.resize((basewidth,hsize), Image.ANTIALIAS)
    os.remove(self.image_path)
    img.save(self.image_path[:-4] + '.png') 
    self.image_path = self.image_path[:-4] + '.png'



def get_text(self, lang):
    # load the example image and convert it to grayscale
    image = cv2.imread(self.image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # check to see if we should apply thresholding to preprocess the
    # image
    #if args["preprocess"] == "thresh":
    gray = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    # make a check to see if median blurring should be done to remove
    # noise
    #elif args["preprocess"] == "blur":
    #   gray = cv2.medianBlur(gray, 3)
    # write the grayscale image to disk as a temporary file so we can
    # apply OCR to it
    filename = "{}.png".format(os.getpid())
    cv2.imwrite(filename, gray)
    #load the image as a PIL/Pillow image, apply OCR, and then delete
    # the temporary file
    text = pytesseract.image_to_string(Image.open(filename), lang='eng')
    os.remove(filename)
    return text
    # show the output images
    #cv2.imshow("Image", image)
    #cv2.imshow("Output", gray)
    #cv2.waitKey(0)

w = wordExtractor('6.png')
print(w.get_text('eng'))

Tesseract returns empty string for the following images:

Please show me how can I solve this Thanks in advance!

Solution

After thresholding, you can use findContours to find contour for each shape. Then you can filter the contours and put every contour you are interested in into a blank white image. By then, you will get the letters and ready to process using tesseract. You can see the detail in the code below.

import cv2
import numpy as np
import pytesseract

# img = cv2.imread("dwLFQ.png", cv2.IMREAD_COLOR)
img = cv2.imread("NfwY4.png", cv2.IMREAD_COLOR)
# img = cv2.imread("xTH6s.png", cv2.IMREAD_COLOR)

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

items = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours = items[0] if len(items) == 2 else items[1]

base = np.zeros(thresh.shape, dtype=np.uint8)
base = cv2.bitwise_not(base)

max_area = 0
for i in range(len(contours)):
    x, y, w, h = cv2.boundingRect(contours[i])
    ratio = h / w
    area = cv2.contourArea(contours[i])
    cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)

    if 1 < ratio < 3:
        max_area = max(area, max_area)
        print("area: " + str(area) + ", max area: " + str(max_area) + ", ratio: " + str(ratio))
        # if 1000 < area < max_area / 2:
        if 1000 < area < 40000:
            mask = np.zeros(thresh.shape, dtype=np.uint8)
            cv2.drawContours(mask, [contours[i]], -1, color=255, thickness=-1)
            mean = cv2.mean(thresh, mask=mask)

            segment = np.zeros((h, w), dtype=np.uint8)
            segment[:h, :w] = thresh[y:y + h, x:x + w]

            if mean[0] > 150:
                # white, invert
                segment = cv2.bitwise_not(segment)

            base[y:y + h, x:x + w] = segment[:h, :w]
            cv2.imshow("base", base)

            cv2.drawContours(img, [contours[i]], 0, (255, 0, 0), 2)

            cv2.waitKey(0)

custom_config = r'-l eng --oem 3 --psm 6 -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZ " '
text = pytesseract.image_to_string(base, config=custom_config)
print("detected: " + text)

cv2.imshow("img", img)
cv2.imshow("base", base)

cv2.waitKey(0)
cv2.destroyAllWindows()

Result

detected: NO
ENTRY

Answered By - us2018

This Answer collected from stackoverflow and tested by PythonFixing community admins, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Monday, February 28, 2022

[FIXED] Tesseract not detecting any text on RGB images on Python

Issue

Solution

0 comments:

Post a Comment

Popular Posts

Labels