Saturday, November 18, 2023

[FIXED] Tesseract OCR not being able to read two rows of text on red background

November 18, 2023 contrast, ocr, python, python-tesseract, tesseract No comments

Issue

I'm encountering issues with pytesseract in my code. I've tried to make the code as easy to read as possible, but please let me know if there's anything that doesn't make sense. The problem seems to be with a small fragment, specifically the extract_gravnumbers_from_red function.

Surprisingly, the extract_gravnumbers_from_yellow function works perfectly fine and can read the text correctly. Additional information on what I've already tried to address the problem is in the next section.

Here are the test images being used:
kvarter_image1.png

kvarter_image2.png

Here is the full code:

import cv2
import pytesseract

# Enter the path to the Tesseract OCR executable
pytesseract.pytesseract.tesseract_cmd = r'path_to_teseract\Tesseract\tesseract.exe'

# Function to extract the grave numbers from a selected region
def extract_gravnumbers_from_yellow(region):
    # Use Tesseract OCR to get the text from the region
    grave_numbers_text = pytesseract.image_to_string(region, config='--psm 6')
    # Transforming the OCR result into a list of grave numbers
    grave_numbers_list = [int(num) for num in grave_numbers_text.split() if num.isdigit()]
    
    try:
        # If there is more than one grave number, return the smallest one
        if len(grave_numbers_list) > 1:
            min_grave_number = min(grave_numbers_list)
            print(min_grave_number)
        else:
            print("No Value")
            min_grave_number = None
    except ValueError:
        # If the list is empty, set None.
        min_grave_number = None
        print("List Empty")

    # Return min_grav_number
    return min_grave_number

def extract_gravnumbers_from_red(region):
    # Use Tesseract OCR to get the text from the region
    grave_numbers_text = pytesseract.image_to_string(region, config='--psm 6')

    # Transforming the OCR result into a list of grave numbers
    grave_numbers_list = [int(num) for num in grave_numbers_text.split() if num.isdigit()]
    
    try:
        # If there is more than one grave number, return the smallest one
        if len(grave_numbers_list) > 1:
            min_grave_number = min(grave_numbers_list)
            print(min_grave_number)
        else:
            print("No Value")
            min_grave_number = None
    except ValueError:
        # If the list is empty, set None.
        min_grave_number = None
        print("List Empty")
    # Return min_grave_number
    return min_grave_number

def get_red_grave_numbers(red_color, hsv_image, image, grav_numbers_list):
    # Define the color range of the red markings in the HSV space.
    lower_red, upper_red = red_color
    
    mask_red = cv2.inRange(hsv_image, lower_red, upper_red)
    
    # Find the contours of the red markings
    red_contours, _ = cv2.findContours(mask_red, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Iterate over the found contours of the red markings
    for red_contour in red_contours:
        # Draw a rectangle around the red mark (only to show the result).
        x, y, w, h = cv2.boundingRect(red_contour)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)

        # Extract the grave number from the red marker
        red_region = image[y:y + h, x:x + w]

        # Extract the grave number from the red marker and add it to the list.
        min_grav_number = extract_gravnumbers_from_red(red_region)
        if min_grav_number is not None:
            grav_numbers_list.append(min_grav_number)
    return grav_numbers_list

def get_yellow_grave_numbers(yellow_color, hsv_image, image, grav_numbers_list):
    # Define the color range of the yellow markings in the HSV space.
    lower_yellow, upper_yellow = yellow_color
    
    mask_yellow = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    
    # Find the contours of the yellow markings
    yellow_contours, _ = cv2.findContours(mask_yellow, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Iterate over the found contours of the yellow markings
    for yellow_contour in yellow_contours:
        # Draw a rectangle around the yellow mark (only to show the result).
        x, y, w, h = cv2.boundingRect(yellow_contour)
        cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 255), 2)

        # Extract the grave number from the yellow marker
        yellow_region = image[y:y + h, x:x + w]
        # Extract the grave number from the red marker and add it to the list.
        min_grav_number = extract_gravnumbers_from_yellow(yellow_region)
        if min_grav_number is not None:
            grav_numbers_list.append(min_grav_number)
    return grav_numbers_list

def check_double_number(filename, kvarter):
    # Load the image
    image = cv2.imread(filename)
    # Print image dimensions to check if the image is loaded correctly
    print("Image shape:", image.shape)
    print(f"Kvarter: {kvarter}")

    # Yellow color HSV Color Code
    yellow_color = ((35, 249, 245), (35, 249, 245))

    # Red color HSV Color Code
    red_color = ((7, 208, 190), (7, 208, 190))

    # Convert the image to the correct color space (BGR to HSV)
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Create a list to store the grave numbers
    grav_numbers_list = []

    # Quick Debug print
    print("\nYellow mark is being processed:\n")
    grav_numbers_list = get_yellow_grave_numbers(yellow_color, hsv_image, image, grav_numbers_list)
    
    # Quick Debug print
    print(f"\nRed mark is being processed:\n")
    grav_numbers_list = get_red_grave_numbers(red_color, hsv_image, image, grav_numbers_list)
    print("\n")
    # Display Result Window
    cv2.imshow('Result', image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    return grav_numbers_list

def create_doublegrave_list():
    kvarter = 2
    doublegraveList = []

    for kv in range(1, kvarter + 1): 
        kv = check_double_number(f"kvarter_image{kv}.png", kv)
        doublegraveList.append(kv)

    return doublegraveList

dubbelgravLista = create_doublegrave_list()
print(dubbelgravLista)

I will not use blockquotes as quotes but rather to improve the struture of the question

It appears that the line responsible for the text not being recognized in the extract_gravnumbers_from_red function is likely this one:

grave_numbers_text = pytesseract.image_to_string(region, config='--psm 6')

When attempting:

min_grave_number = min(grave_numbers_list)
print(min_grave_number)

In the else statement, a ValueError is raised because it returns an empty result, since no integer was found. This issue seems to be specific to reading two "rows" of text on the red regions, since it works fine for the yellow regions.

When printing grave_numbers_text in the else statement on the first image I get this output:

Red mark is being processed:

No Value
ize

No Value
ro

No Value
nz

So it is obivously reading and getting something, but it ain't right. Altough on the second image it does get it right:

Red mark is being processed:

No Value
146

That region that it does get right only contains one row of text. So my theory is that it can't read two "rows" of text, due to some sort of setting that I am missing combined with a different contrast since it does get it right on the yellow regions. I know it is not about the quality of the image because I tried to run it with a much larger kvarter_image1.png image.

I've experimented with the config argument and tried various options such as --psm 11, --psm 12, --psm 7 and --psm 4, but unfortunately, it did not make any difference. Since the config argument might not be affecting the contrast, which is likely the primary challenge.

I've displayed the red region and this is the result:

Displayed Red Region

Which looks like it should.

An alternative approach was attempted by using color filtering and histogram equalization as follows:

red_text_mask = cv2.inRange(region, (63, 161, 76), (7, 208, 190))
red_text = cv2.bitwise_and(region, region, mask=red_text_mask)

gray_text = cv2.cvtColor(red_text, cv2.COLOR_BGR2GRAY)

equalized_text = cv2.equalizeHist(gray_text)

grave_numbers_text = pytesseract.image_to_string(equalized_text, config='--psm 6')

The color code used is HSV and is the same as the red_color variable. The black color code I got from selecting it in this program:

import cv2
import numpy as np

def get_hsv_color(event, x, y, flags, param):
if event == cv2.EVENT_LBUTTONDOWN:
pixel_value = image\[y, x\]
hsv_pixel_value = cv2.cvtColor(np.uint8(\[\[pixel_value\]\]), cv2.COLOR_BGR2HSV)
print("HSV Value:", hsv_pixel_value)

Read the image

image = cv2.imread('kvarter_image1.png')

Create a window to display the image

cv2.namedWindow('Image')
cv2.imshow('Image', image)

Set the mouse callback function

cv2.setMouseCallback('Image', get_hsv_color)

Wait for a key event to exit

cv2.waitKey(0)
cv2.destroyAllWindows()

However, even with this modification, the result remains the same, and the ValueError persists when trying to print it in the else statement as the grave_numbers_text variable remains empty.

It seems that reading black text on a red background poses a challenge for the OCR process. The lack of contrast might be hindering the accurate recognition of the text. Or there might be something wrong with my configuration. Any insights or suggestions to improve the OCR performance on red regions with black text would be highly appreciated. Thank you for your assistance!

Solution

Thanks to Mark Ransom the problem is solved who came up with a great suggestion! Very clever didn't think of that.

Thanks Yunus as well! That might be useful later! Edit: I looked at your results, very impresive indeed. Having the coordinate data could be valuable for future use. Thanks again!

This is the updated function with splitting the RGB channels, if anyone is interested:

def extract_gravnumbers_from_red(region):
    # Split the BGR image into individual channels
    blue_channel, green_channel, red_channel = cv2.split(region)

    # Use only the Red channel to create a grayscale image
    red_grayscale = red_channel.copy()

    # Enhance contrast with histogram equalization
    equalized_red = cv2.equalizeHist(red_grayscale)

    # Use Tesseract OCR to get text from the enhanced image
    grave_numbers_text = pytesseract.image_to_string(equalized_red, config='--psm 6')


    # Transforming the OCR result into a list of grave numbers
    grave_numbers_list = [int(num) for num in grave_numbers_text.split() if num.isdigit()]
    
    try:
        # If there is more than one grave number, return the smallest one
        if len(grave_numbers_list) > 1:
            min_grave_number = min(grave_numbers_list)
            print(min_grave_number)
        else:
            print("No Value")
            print(grave_numbers_text)
            min_grave_number = None
    except ValueError:
        # If the list is empty, set None.
        min_grave_number = None
        print("List Empty")
    # Return min_grave_number
    return min_grave_number

Answered By - Daniel Sjöberg

This Answer collected from stackoverflow and tested by PythonFixing community admins, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Saturday, November 18, 2023

[FIXED] Tesseract OCR not being able to read two rows of text on red background

Issue

Solution

0 comments:

Post a Comment

Popular Posts

Labels