Issue
I am trying to build a OCR system to extract a serial number from hundreds of labels. I am running the image through opencv and pytesseract to get the full text but I am having trouble clearing the background for PyTesseract to work properly.
The region of interest I am trying to extract information out from looks like the following (I blocked two characters for privacy).
In attempt to improve performance, I have split the 3 line serial number into 3 separate ROIs.
The following code is what I have to produce the first line.
Which pytesseract spits out '7IP29 AGH2TR:\n'.
import cv2 as cv
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
#Tesseract Library
import pytesseract
import re
from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/Cellar/tesseract/5.3.0_1/bin/tesseract'
# In[Img Load]
image_path = '/Users/cfr/Desktop/20230308_111250.jpg'
img = cv.imread(image_path,0)
print('Original Dimensions : ',img.shape)
scale_percent = 25 # percent of original size
width = int(img.shape[3] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv.resize(img, dim, interpolation = cv.INTER_AREA)
print('Resized Dimensions : ',resized.shape)
# In[ROI]
roi1 = (263, 252, 226, 43)
roi2 = (265, 288, 224, 32)
roi3 = (274, 320, 106, 32)
# In[Cropped ROI]
def roi_cropper(_image, _roi):
roi_cropped = _image[int(_roi[3]):int(_roi[3]+_roi[3]), int(_roi[0]):int(_roi[0]+_roi[4])]
return roi_cropped
roi_img1 = roi_cropper(resized, roi1)
roi_img2 = roi_cropper(resized, roi2)
roi_img3 = roi_cropper(resized, roi3)
# In[BlackHat]
# initialize a rectangular and s quare structuring kernel
x = 5
y = 2
kernel = cv.getStructuringElement(cv.MORPH_RECT, (x, y))
gray = cv.GaussianBlur(roi_img1, (5, 5), 0)
blackhat = cv.morphologyEx(gray, cv.MORPH_BLACKHAT, kernel)
blackhat_dilated = cv.dilate(blackhat, None, iterations=1)
plt.imshow(blackhat_dilated)
# In[Tesseract]
text = pytesseract.image_to_string(blackhat_dilated, config='--psm 2')
print(text)
Solution
You will maybe never get the perfect result. I played a little bit with the picture parameters. Here my code, maybe some help. Could you improve your image? And you should use b/w images only:
import subprocess
import cv2
import pytesseract
# Image manipulation
# Commands https://imagemagick.org/script/convert.php
mag_img = r'D:\Programme\ImageMagic\magick.exe'
con_bw = r"D:\Programme\ImageMagic\convert.exe"
in_file = r'D:\Daten\..\stackoverflow\ID.jpg'
out_file = r'D:\Daten\..\stackoverflow\ID_bw.jpg'
# Play with black and white and contrast for better results
process = subprocess.run([con_bw, in_file, "-threshold","18%", "-brightness-contrast","-10x30", out_file])
# Text ptocessing
pytesseract.pytesseract.tesseract_cmd=r'C:\Program Files\Tesseract-OCR\tesseract.exe'
img = cv2.imread(out_file)
# Parameters see tesseract doc
custom_config = r'--psm 3 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFHIJKLMNOPQRSTUVWXYZ'
tex = pytesseract.image_to_string(img, config=custom_config)
print(tex)
cv2.imshow('image',img)
cv2.waitKey(1000)
cv2.destroyAllWindows()
Output, not perfect:
75229CN2TR
TDETC1D72
HM7 COAR
Answered By - Hermann12
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.