Issue
I am searching for a particular set of string values from pytesseract.image_to_data in the dictionary d['text'] value from a list of 'terms'. I have written a function check() which returns the right co-ordinate of the match but but for all the other searches for which there is not match, it returns a default coordinate (['left', 'top', 'width', 'height']).
Reason why i used split because the strings are like this:["Print Name of Subscriber", "Investor's Name"]. And in pytesseract.image_to_data, the d['text'] is in form: ['Print', 'Name', 'of', 'Subscriber'].
pytesseract.image_to_data returns a dictionary of the images with data.keys(): ['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', 'width', 'height', 'conf', 'text']. I am only concerend about the 'text' and the coordinates: 'left', 'top', 'width', 'height'. It should return None when there isn't a match, it is returning a default coordinate.
Code:
import pytesseract
from pytesseract import Output
import cv2
image_path='image/out.jpg'
terms=["Print Name of Subscriber", "Investor's Name", "Name of Investor"]
image = cv2.imread(image_path)
d = pytesseract.image_to_data(image, output_type=Output.DICT)
def check(word, d):
phrases = [w for w in word.split()]
for i, t in enumerate(d['text']):
to_match = d['text'][i+1: i+1+len(phrases)]
if all(p == m for p,m in zip(phrases, to_match)):
return [*map(lambda x:d[x][i], ['left', 'top', 'width', 'height'])]
for term in terms:
check=check(term, d)
print(check)
When there's a match for the last item it returns: [93, 975, 148, 20] correct. For all the other entries in the terms without match on the image, it returns: [151, 2078, 170, 8] default. I have tried returning None in the else statement in the function, then it return None for all the matches as well.
the value of d:
{'level': [1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 2, 3, 4, 5, 5, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 3, 4, 5, 5, 5, 5, 2, 3, 4, 5, 2, 3, 4, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 2, 3, 4, 5, 2, 3, 4, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 2, 3, 4, 5, 5, 5, 2, 3, 4, 5, 5, 5, 4, 5], 'page_num': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'block_num': [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14], 'par_num': [0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1], 'line_num': [0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2], 'word_num': [0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 1, 2, 3, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0, 1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 0, 1, 0, 0, 1, 2, 3, 4, 0, 0, 0, 1, 0, 0, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 1, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 0, 0, 0, 1, 2, 3, 0, 0, 0, 1, 2, 3, 0, 1], 'left': [0, 93, 93, 93, 93, 145, 308, 513, 616, 694, 767, 93, 93, 93, 93, 348, 653, 94, 94, 94, 94, 155, 207, 243, 355, 489, 518, 570, 94, 94, 94, 94, 167, 199, 300, 384, 441, 472, 94, 94, 204, 269, 372, 471, 500, 94, 94, 237, 306, 411, 516, 94, 94, 95, 95, 173, 248, 294, 362, 394, 476, 507, 553, 665, 694, 765, 816, 875, 907, 974, 1009, 1127, 1270, 1306, 1403, 94, 94, 228, 275, 323, 348, 461, 478, 508, 554, 675, 818, 885, 966, 1062, 1106, 1276, 1300, 1346, 1415, 95, 95, 177, 229, 288, 320, 397, 424, 451, 497, 568, 595, 713, 93, 93, 95, 95, 172, 251, 299, 360, 404, 528, 586, 621, 662, 692, 758, 790, 832, 994, 1068, 1123, 1159, 1200, 1281, 1316, 1371, 1402, 93, 93, 249, 282, 345, 409, 477, 496, 544, 600, 629, 688, 730, 852, 908, 937, 978, 1135, 1168, 1237, 1315, 1393, 93, 93, 139, 194, 241, 325, 379, 422, 494, 519, 642, 670, 754, 791, 924, 1000, 1029, 1069, 1226, 1263, 1331, 1400, 95, 95, 129, 198, 223, 264, 396, 462, 538, 581, 635, 660, 791, 845, 886, 1016, 1058, 1102, 1134, 1253, 1300, 1352, 1420, 94, 94, 172, 200, 256, 389, 408, 456, 538, 602, 649, 739, 818, 897, 944, 1001, 1024, 1123, 1157, 1198, 1355, 1389, 95, 95, 152, 198, 265, 357, 416, 476, 531, 601, 693, 762, 825, 860, 961, 996, 1052, 1114, 1178, 1214, 1258, 1307, 1382, 95, 95, 195, 255, 315, 358, 446, 501, 546, 650, 724, 804, 849, 883, 952, 977, 1088, 1108, 1137, 1183, 1303, 95, 95, 94, 94, 95, 95, 169, 257, 296, 451, 471, 512, 602, 681, 716, 839, 865, 1007, 1060, 1079, 1201, 1330, 94, 94, 135, 235, 292, 358, 462, 488, 529, 661, 93, 95, 95, 95, 168, 225, 277, 320, 386, 428, 472, 506, 614, 669, 712, 769, 793, 891, 939, 1034, 1064, 1098, 1123, 1186, 94, 94, 94, 207, 266, 298, 451, 479, 524, 595, 673, 717, 751, 827, 861, 904, 991, 1095, 1184, 352, 352, 93, 93, 93, 191, 221, 268, 94, 94, 94, 94, 94, 94, 94, 94, 246, 274, 889, 889, 985, 1014, 1091, 1122, 1157, 1276, 1301, 889, 889, 985, 1014, 1091, 1122, 1157, 1277, 1301, 93, 93, 93, 93, 94, 94, 94, 94, 246, 282, 376, 404, 503, 889, 889, 986, 1010, 889, 889, 889, 889, 986, 1010, 98, 93, 98, 98, 150, 1431, 227, 151], 'top': [0, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 155, 155, 155, 155, 155, 158, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 272, 407, 407, 407, 409, 407, 409, 408, 409, 415, 408, 474, 474, 476, 476, 474, 474, 474, 542, 544, 542, 543, 546, 542, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 612, 613, 610, 618, 610, 614, 610, 618, 610, 610, 610, 610, 610, 648, 651, 648, 648, 651, 648, 650, 648, 648, 648, 648, 650, 649, 652, 648, 648, 648, 648, 648, 648, 687, 687, 694, 688, 691, 689, 689, 689, 691, 687, 689, 687, 689, 754, 754, 754, 754, 762, 762, 754, 754, 754, 754, 754, 754, 762, 754, 758, 754, 754, 756, 758, 754, 754, 754, 754, 758, 758, 754, 793, 793, 801, 795, 795, 795, 793, 801, 793, 797, 793, 793, 793, 793, 797, 793, 793, 793, 793, 793, 801, 793, 831, 839, 832, 831, 832, 832, 832, 833, 833, 832, 831, 833, 831, 834, 832, 835, 832, 832, 834, 832, 832, 839, 870, 870, 870, 873, 870, 871, 872, 870, 875, 874, 873, 870, 870, 870, 870, 870, 874, 871, 870, 870, 878, 871, 873, 908, 908, 912, 916, 911, 908, 916, 911, 916, 908, 908, 909, 916, 909, 908, 911, 908, 908, 908, 908, 916, 908, 947, 947, 948, 947, 947, 949, 948, 947, 948, 947, 947, 952, 948, 948, 947, 949, 947, 948, 955, 947, 947, 947, 947, 986, 988, 988, 986, 986, 986, 988, 986, 989, 986, 986, 994, 986, 986, 989, 986, 988, 986, 986, 986, 986, 1026, 1026, 1092, 1092, 1092, 1092, 1096, 1092, 1093, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1130, 1138, 1130, 1130, 1130, 1130, 1133, 1130, 1130, 1132, 1198, 1198, 1198, 1198, 1202, 1199, 1198, 1198, 1198, 1202, 1198, 1198, 1199, 1199, 1198, 1201, 1198, 1199, 1198, 1202, 1199, 1201, 1199, 1198, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1274, 1266, 1266, 1266, 1266, 1267, 1333, 1333, 1401, 1401, 1403, 1401, 1401, 1401, 1469, 1469, 1469, 1469, 1522, 1522, 1522, 1522, 1524, 1522, 1590, 1591, 1590, 1590, 1594, 1590, 1590, 1593, 1590, 1657, 1658, 1657, 1658, 1661, 1658, 1657, 1660, 1657, 1727, 1727, 1727, 1727, 1778, 1778, 1778, 1778, 1778, 1782, 1780, 1778, 1785, 1847, 1847, 1849, 1847, 1913, 1913, 1913, 1914, 1916, 1913, 2043, 2043, 2043, 2043, 2043, 2058, 2078, 2078], 'width': [1530, 716, 716, 716, 43, 153, 197, 95, 69, 64, 42, 669, 669, 669, 234, 284, 109, 599, 599, 599, 52, 42, 27, 102, 123, 21, 43, 123, 489, 489, 433, 65, 23, 92, 75, 50, 22, 55, 489, 101, 57, 94, 90, 23, 83, 479, 134, 59, 97, 96, 57, 1346, 1346, 1342, 68, 65, 35, 57, 24, 70, 23, 35, 102, 17, 61, 40, 47, 21, 56, 24, 109, 132, 27, 86, 34, 1346, 125, 39, 40, 17, 104, 8, 24, 38, 113, 133, 57, 72, 87, 36, 162, 18, 37, 60, 25, 632, 75, 43, 50, 24, 69, 18, 18, 38, 64, 19, 109, 14, 1346, 1346, 1342, 68, 71, 39, 52, 35, 115, 48, 27, 32, 23, 58, 22, 34, 151, 64, 47, 27, 32, 72, 26, 47, 22, 35, 1345, 150, 25, 55, 57, 58, 16, 40, 47, 21, 52, 34, 115, 48, 21, 35, 151, 27, 61, 70, 70, 45, 1344, 39, 49, 40, 78, 45, 34, 65, 16, 115, 23, 77, 31, 125, 69, 21, 34, 151, 28, 62, 62, 37, 1342, 25, 61, 17, 35, 123, 56, 68, 36, 47, 17, 123, 46, 35, 121, 36, 36, 26, 112, 40, 45, 60, 17, 1345, 69, 21, 49, 122, 16, 40, 74, 56, 40, 82, 71, 71, 39, 48, 15, 92, 27, 35, 150, 25, 50, 1342, 46, 35, 57, 84, 49, 49, 44, 58, 83, 59, 54, 26, 91, 26, 45, 52, 54, 25, 34, 40, 65, 55, 1341, 91, 52, 53, 35, 80, 44, 37, 96, 68, 72, 36, 26, 62, 18, 104, 13, 24, 39, 113, 133, 56, 56, 1343, 1343, 1342, 68, 82, 34, 151, 16, 35, 84, 72, 30, 117, 24, 135, 47, 16, 114, 124, 107, 624, 33, 92, 47, 59, 96, 17, 35, 123, 57, 1152, 1150, 1150, 66, 48, 44, 35, 56, 35, 36, 26, 100, 47, 35, 48, 15, 91, 40, 87, 21, 25, 17, 56, 59, 1144, 1144, 104, 51, 24, 142, 23, 37, 61, 69, 36, 26, 69, 25, 35, 80, 95, 80, 54, 157, 157, 324, 324, 89, 23, 38, 149, 63, 63, 63, 63, 1286, 1286, 259, 142, 18, 79, 484, 88, 22, 69, 22, 26, 109, 16, 72, 491, 88, 22, 69, 22, 26, 109, 15, 79, 27, 27, 27, 27, 988, 988, 513, 142, 27, 85, 18, 91, 104, 193, 88, 16, 72, 200, 200, 200, 88, 16, 79, 1337, 1342, 1337, 29, 175, 4, 94, 170], 'height': [2164, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 51, 51, 51, 40, 51, 36, 23, 23, 23, 23, 18, 18, 18, 18, 18, 18, 23, 161, 161, 26, 18, 20, 18, 23, 18, 12, 25, 24, 20, 18, 18, 20, 24, 24, 26, 24, 20, 23, 16, 26, 103, 103, 26, 20, 20, 20, 20, 20, 20, 20, 20, 24, 17, 20, 18, 20, 16, 20, 12, 20, 26, 26, 26, 20, 26, 23, 20, 20, 17, 26, 18, 20, 20, 20, 26, 18, 19, 16, 20, 20, 20, 20, 20, 20, 26, 20, 19, 19, 16, 18, 18, 18, 16, 20, 18, 26, 18, 290, 290, 26, 20, 12, 18, 20, 20, 26, 20, 26, 20, 12, 20, 16, 20, 20, 18, 16, 26, 20, 20, 20, 16, 16, 20, 26, 20, 12, 18, 18, 18, 20, 18, 20, 16, 20, 20, 26, 20, 16, 20, 20, 26, 24, 26, 12, 20, 26, 18, 19, 20, 25, 19, 19, 18, 18, 19, 20, 18, 20, 17, 19, 16, 19, 19, 17, 19, 19, 12, 26, 20, 20, 17, 20, 25, 18, 20, 15, 16, 17, 20, 20, 20, 26, 20, 16, 19, 26, 20, 18, 19, 17, 26, 26, 16, 18, 17, 20, 18, 23, 12, 26, 26, 25, 12, 19, 20, 17, 20, 26, 20, 20, 12, 20, 26, 20, 19, 20, 20, 18, 25, 20, 23, 20, 20, 15, 19, 19, 26, 24, 24, 23, 12, 20, 26, 26, 20, 26, 18, 24, 20, 20, 26, 24, 20, 23, 20, 20, 12, 20, 20, 17, 26, 18, 20, 20, 20, 26, 18, 18, 64, 64, 26, 20, 16, 20, 19, 20, 20, 26, 20, 20, 20, 20, 26, 24, 20, 26, 20, 26, 26, 12, 20, 20, 20, 26, 17, 20, 26, 18, 229, 26, 26, 26, 16, 19, 20, 20, 20, 16, 20, 20, 19, 19, 20, 17, 20, 19, 20, 16, 19, 17, 25, 20, 93, 26, 20, 20, 20, 26, 24, 20, 24, 20, 20, 20, 20, 12, 20, 20, 20, 26, 25, 26, 26, 26, 26, 18, 20, 20, 26, 20, 20, 20, 20, 161, 161, 26, 26, 18, 20, 26, 23, 20, 20, 16, 20, 26, 17, 24, 26, 23, 20, 19, 16, 19, 26, 17, 26, 18, 18, 18, 18, 92, 92, 26, 26, 26, 16, 18, 20, 19, 23, 23, 17, 23, 26, 26, 26, 23, 17, 26, 57, 57, 57, 57, 40, 12, 8, 8], 'conf': ['-1', '-1', '-1', '-1', 96, 93, 90, 95, 95, 96, 96, '-1', '-1', '-1', 96, 96, 96, '-1', '-1', '-1', 95, 95, 96, 96, 96, 95, 96, 96, '-1', '-1', '-1', 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, '-1', '-1', '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', '-1', '-1', 96, 96, 96, 96, 96, 96, 95, 95, 96, 96, 96, 96, 97, 95, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 95, 96, 96, 80, 96, 95, 96, 96, 96, 96, 97, 96, 95, 95, 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 34, 92, 96, 96, 96, 96, 96, 96, 95, 95, 96, 93, 90, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 92, 94, 96, 96, 96, 96, 96, 96, 96, 84, 81, 96, 96, 96, 96, 96, 96, '-1', 96, 96, 93, 96, 96, 96, 96, 93, 88, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', 96, '-1', '-1', '-1', 96, 96, 96, 95, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 95, 96, 96, '-1', 96, 94, 96, 96, 95, 95, 96, 96, 96, '-1', '-1', '-1', 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, '-1', '-1', 96, 96, 96, 96, 90, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 97, 96, '-1', 87, '-1', '-1', 96, 96, 97, 96, '-1', '-1', '-1', 95, '-1', '-1', '-1', 96, 96, 96, '-1', 96, 96, 96, 96, 97, 96, 96, 96, '-1', 96, 96, 96, 96, 96, 96, 96, 96, '-1', '-1', '-1', 96, '-1', '-1', '-1', 96, 96, 96, 96, 96, 96, '-1', 96, 96, 96, '-1', '-1', '-1', 96, 95, 95, '-1', '-1', '-1', 55, 86, 95, '-1', 1], 'text': ['', '', '', '', 'THE', 'COLCHESTER', 'MULTI-STRATEGY', 'GLOBAL', 'BOND', 'FUND', 'PLC', '', '', '', 'Additional', 'Subscription', 'Form', '', '', '', '(FOR', 'USE', 'BY', 'EXISTING', 'INVESTORS', 'IN', 'THE', 'COMPANY)', '', '', '', 'Name', 'of', 'Investor', '(Please', 'Print', 'or', 'Type)', '', 'Northern', 'Trust', 'Account', 'Number', '(if', 'known):', '', 'Designation', 'Code', '(nominee', 'accounts', 'only):', '', '', '', 'Please', 'select', 'the', 'Class', 'of', 'Shares', 'of', 'the', 'Company', 'in', 'which', 'you', 'wish', 'to', 'make', 'an', 'additional', 'subscription', 'by', 'marking', 'the', '', 'appropriate', 'tick', 'box', 'in', 'Appendix', '1', 'of', 'this', 'Additional', 'Subscription', 'Form.', 'Please', 'contact', 'the', 'Administrator', 'if', 'the', 'Class', 'of', '', 'Shares', 'you', 'wish', 'to', 'invest', 'in', 'is', 'not', 'listed', 'in', 'Appendix', '1.', '', '', '', 'Please', 'ensure', 'you', 'send', 'the', 'completed', 'form', 'by', 'fax', 'or', 'email', 'to', 'the', 'Administrator.', 'Forms', 'sent', 'by', 'fax', 'should', 'be', 'sent', 'to', 'the', '', 'Administrator', 'on', '+353', '1542', '2902.', 'If', 'you', 'wish', 'to', 'send', 'the', 'completed', 'form', 'to', 'the', 'Administrator', 'by', 'email,', 'please', 'ensure', 'that', '', 'you', 'read', 'and', 'comply', 'with', 'the', '‘Terms', '&', 'Conditions', 'of', 'Service', 'for', 'instructions', 'issued', 'to', 'the', 'Administrator', 'via', "Email'", 'which', 'can', '', 'be', 'found', 'in', 'the', 'Application', 'Form.', 'Emails', 'not', 'sent', 'in', 'accordance', 'with', 'the', 'procedures', 'will', 'not', 'be', 'processed', 'and', 'may', 'result', 'in', '', 'delays', 'to', 'your', 'investment.', 'If', 'you', 'require', 'same', 'day', 'dealing,', 'please', 'ensure', 'this', 'form', 'is', 'received', 'by', 'the', 'Administrator', 'no', 'later', '', 'than', 'the', 'Trade', 'Cut-Off', 'Time', '(1pm', 'Irish', 'time).', 'Cleared', 'funds', 'must', 'be', 'received', 'by', '6pm', '(Irish', 'time)', 'on', 'the', 'day', 'falling', 'three', '', 'Business', 'Days', 'after', 'the', 'Dealing', 'Day.', 'The', 'receiving', "banks'", 'details', 'can', 'be', 'found', 'in', 'Appendix', '2', 'of', 'this', 'Additional', 'Subscription', '', 'Form.', '', '', '', 'Please', 'contact', 'the', 'Administrator', 'if', 'the', 'banking', 'details', 'for', 'settlement', 'of', 'redemptions', '(and', 'if', 'applicable,', 'distribution', 'proceeds)', '', 'are', 'different', 'from', 'those', 'provided', 'in', 'the', 'Application', 'Form.', '', '', '', 'Kindly', 'note', 'that', 'the', 'trade', 'will', 'not', 'be', 'executed', 'until', 'the', 'form', 'is', 'received', 'and', 'deemed', 'to', 'be', 'in', 'good', 'order.', '', '', 'Preferred', 'date', 'of', 'Subscription', '(if', 'left', 'blank,', 'shares', 'will', 'be', 'issued', 'on', 'the', 'earliest', 'available', 'Dealing', 'Day):', '', '(dd/mm/yyyy)', '', '', 'Amount', 'of', 'the', 'Subscription:', '', '', '', 'Either', '', '', '', 'Subscription', 'in', 'shares:', '', '(amount', 'of', 'shares', 'to', 'be', 'purchased', 'in', 'words)', '', '(amount', 'of', 'shares', 'to', 'be', 'purchased', 'in', 'figures)', '', '', '', 'Or', '', '', '', 'Subscription', 'by', 'amount', 'in', 'relevant', 'currency:', '', '(amount', 'in', 'words)', '', '', '', '(amount', 'in', 'figures)', '', '', '', 'GS', 'COLCHESTER*', '1', '', 'Genero']}
Solution
You need to fix your return statement to not return a list if your to_match
is an empty result:
d = {
# removed irrelevant stuff
'level': [], 'page_num': [], 'block_num': [], 'par_num': [], 'line_num': [], 'word_num': [], 'conf': [],
# keep important data
'left': [0, 93, 93, 93, 93, 145, 308, 513, 616, 694, 767, 93, 93, 93, 93, 348, 653, 94, 94, 94, 94, 155, 207, 243, 355, 489, 518, 570, 94, 94, 94, 94, 167, 199, 300, 384, 441, 472, 94, 94, 204, 269, 372, 471, 500, 94, 94, 237, 306, 411, 516, 94, 94, 95, 95, 173, 248, 294, 362, 394, 476, 507, 553, 665, 694, 765, 816, 875, 907, 974, 1009, 1127, 1270, 1306, 1403, 94, 94, 228, 275, 323, 348, 461, 478, 508, 554, 675, 818, 885, 966, 1062, 1106, 1276, 1300, 1346, 1415, 95, 95, 177, 229, 288, 320, 397, 424, 451, 497, 568, 595, 713, 93, 93, 95, 95, 172, 251, 299, 360, 404, 528, 586, 621, 662, 692, 758, 790, 832, 994, 1068, 1123, 1159, 1200, 1281, 1316, 1371, 1402, 93, 93, 249, 282, 345, 409, 477, 496, 544, 600, 629, 688, 730, 852, 908, 937, 978, 1135, 1168, 1237, 1315, 1393, 93, 93, 139, 194, 241, 325, 379, 422, 494, 519, 642, 670, 754, 791, 924, 1000, 1029, 1069, 1226, 1263, 1331, 1400, 95, 95, 129, 198, 223, 264, 396, 462, 538, 581, 635, 660, 791, 845, 886, 1016, 1058, 1102, 1134, 1253, 1300, 1352, 1420, 94, 94, 172, 200, 256, 389, 408, 456, 538, 602, 649, 739, 818, 897, 944, 1001, 1024, 1123, 1157, 1198, 1355, 1389, 95, 95, 152, 198, 265, 357, 416, 476, 531, 601, 693, 762, 825, 860, 961, 996, 1052, 1114, 1178, 1214, 1258, 1307, 1382, 95, 95, 195, 255, 315, 358, 446, 501, 546, 650, 724, 804, 849, 883, 952, 977, 1088, 1108, 1137, 1183, 1303, 95, 95, 94, 94, 95, 95, 169, 257, 296, 451, 471, 512, 602, 681, 716, 839, 865, 1007, 1060, 1079, 1201, 1330, 94, 94, 135, 235, 292, 358, 462, 488, 529, 661, 93, 95, 95, 95, 168, 225, 277, 320, 386, 428, 472, 506, 614, 669, 712, 769, 793, 891, 939, 1034, 1064, 1098, 1123, 1186, 94, 94, 94, 207, 266, 298, 451, 479, 524, 595, 673, 717, 751, 827, 861, 904, 991, 1095, 1184, 352, 352, 93, 93, 93, 191, 221, 268, 94, 94, 94, 94, 94, 94, 94, 94, 246, 274, 889, 889, 985, 1014, 1091, 1122, 1157, 1276, 1301, 889, 889, 985, 1014, 1091, 1122, 1157, 1277, 1301, 93, 93, 93, 93, 94, 94, 94, 94, 246, 282, 376, 404, 503, 889, 889, 986, 1010, 889, 889, 889, 889, 986, 1010, 98, 93, 98, 98, 150, 1431, 227, 151],
'top': [0, 94, 94, 94, 94, 94, 94, 94, 94, 94, 94, 155, 155, 155, 155, 155, 158, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 272, 407, 407, 407, 409, 407, 409, 408, 409, 415, 408, 474, 474, 476, 476, 474, 474, 474, 542, 544, 542, 543, 546, 542, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 610, 612, 613, 610, 618, 610, 614, 610, 618, 610, 610, 610, 610, 610, 648, 651, 648, 648, 651, 648, 650, 648, 648, 648, 648, 650, 649, 652, 648, 648, 648, 648, 648, 648, 687, 687, 694, 688, 691, 689, 689, 689, 691, 687, 689, 687, 689, 754, 754, 754, 754, 762, 762, 754, 754, 754, 754, 754, 754, 762, 754, 758, 754, 754, 756, 758, 754, 754, 754, 754, 758, 758, 754, 793, 793, 801, 795, 795, 795, 793, 801, 793, 797, 793, 793, 793, 793, 797, 793, 793, 793, 793, 793, 801, 793, 831, 839, 832, 831, 832, 832, 832, 833, 833, 832, 831, 833, 831, 834, 832, 835, 832, 832, 834, 832, 832, 839, 870, 870, 870, 873, 870, 871, 872, 870, 875, 874, 873, 870, 870, 870, 870, 870, 874, 871, 870, 870, 878, 871, 873, 908, 908, 912, 916, 911, 908, 916, 911, 916, 908, 908, 909, 916, 909, 908, 911, 908, 908, 908, 908, 916, 908, 947, 947, 948, 947, 947, 949, 948, 947, 948, 947, 947, 952, 948, 948, 947, 949, 947, 948, 955, 947, 947, 947, 947, 986, 988, 988, 986, 986, 986, 988, 986, 989, 986, 986, 994, 986, 986, 989, 986, 988, 986, 986, 986, 986, 1026, 1026, 1092, 1092, 1092, 1092, 1096, 1092, 1093, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1092, 1130, 1138, 1130, 1130, 1130, 1130, 1133, 1130, 1130, 1132, 1198, 1198, 1198, 1198, 1202, 1199, 1198, 1198, 1198, 1202, 1198, 1198, 1199, 1199, 1198, 1201, 1198, 1199, 1198, 1202, 1199, 1201, 1199, 1198, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1266, 1274, 1266, 1266, 1266, 1266, 1267, 1333, 1333, 1401, 1401, 1403, 1401, 1401, 1401, 1469, 1469, 1469, 1469, 1522, 1522, 1522, 1522, 1524, 1522, 1590, 1591, 1590, 1590, 1594, 1590, 1590, 1593, 1590, 1657, 1658, 1657, 1658, 1661, 1658, 1657, 1660, 1657, 1727, 1727, 1727, 1727, 1778, 1778, 1778, 1778, 1778, 1782, 1780, 1778, 1785, 1847, 1847, 1849, 1847, 1913, 1913, 1913, 1914, 1916, 1913, 2043, 2043, 2043, 2043, 2043, 2058, 2078, 2078],
'width': [1530, 716, 716, 716, 43, 153, 197, 95, 69, 64, 42, 669, 669, 669, 234, 284, 109, 599, 599, 599, 52, 42, 27, 102, 123, 21, 43, 123, 489, 489, 433, 65, 23, 92, 75, 50, 22, 55, 489, 101, 57, 94, 90, 23, 83, 479, 134, 59, 97, 96, 57, 1346, 1346, 1342, 68, 65, 35, 57, 24, 70, 23, 35, 102, 17, 61, 40, 47, 21, 56, 24, 109, 132, 27, 86, 34, 1346, 125, 39, 40, 17, 104, 8, 24, 38, 113, 133, 57, 72, 87, 36, 162, 18, 37, 60, 25, 632, 75, 43, 50, 24, 69, 18, 18, 38, 64, 19, 109, 14, 1346, 1346, 1342, 68, 71, 39, 52, 35, 115, 48, 27, 32, 23, 58, 22, 34, 151, 64, 47, 27, 32, 72, 26, 47, 22, 35, 1345, 150, 25, 55, 57, 58, 16, 40, 47, 21, 52, 34, 115, 48, 21, 35, 151, 27, 61, 70, 70, 45, 1344, 39, 49, 40, 78, 45, 34, 65, 16, 115, 23, 77, 31, 125, 69, 21, 34, 151, 28, 62, 62, 37, 1342, 25, 61, 17, 35, 123, 56, 68, 36, 47, 17, 123, 46, 35, 121, 36, 36, 26, 112, 40, 45, 60, 17, 1345, 69, 21, 49, 122, 16, 40, 74, 56, 40, 82, 71, 71, 39, 48, 15, 92, 27, 35, 150, 25, 50, 1342, 46, 35, 57, 84, 49, 49, 44, 58, 83, 59, 54, 26, 91, 26, 45, 52, 54, 25, 34, 40, 65, 55, 1341, 91, 52, 53, 35, 80, 44, 37, 96, 68, 72, 36, 26, 62, 18, 104, 13, 24, 39, 113, 133, 56, 56, 1343, 1343, 1342, 68, 82, 34, 151, 16, 35, 84, 72, 30, 117, 24, 135, 47, 16, 114, 124, 107, 624, 33, 92, 47, 59, 96, 17, 35, 123, 57, 1152, 1150, 1150, 66, 48, 44, 35, 56, 35, 36, 26, 100, 47, 35, 48, 15, 91, 40, 87, 21, 25, 17, 56, 59, 1144, 1144, 104, 51, 24, 142, 23, 37, 61, 69, 36, 26, 69, 25, 35, 80, 95, 80, 54, 157, 157, 324, 324, 89, 23, 38, 149, 63, 63, 63, 63, 1286, 1286, 259, 142, 18, 79, 484, 88, 22, 69, 22, 26, 109, 16, 72, 491, 88, 22, 69, 22, 26, 109, 15, 79, 27, 27, 27, 27, 988, 988, 513, 142, 27, 85, 18, 91, 104, 193, 88, 16, 72, 200, 200, 200, 88, 16, 79, 1337, 1342, 1337, 29, 175, 4, 94, 170],
'height': [2164, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 51, 51, 51, 40, 51, 36, 23, 23, 23, 23, 18, 18, 18, 18, 18, 18, 23, 161, 161, 26, 18, 20, 18, 23, 18, 12, 25, 24, 20, 18, 18, 20, 24, 24, 26, 24, 20, 23, 16, 26, 103, 103, 26, 20, 20, 20, 20, 20, 20, 20, 20, 24, 17, 20, 18, 20, 16, 20, 12, 20, 26, 26, 26, 20, 26, 23, 20, 20, 17, 26, 18, 20, 20, 20, 26, 18, 19, 16, 20, 20, 20, 20, 20, 20, 26, 20, 19, 19, 16, 18, 18, 18, 16, 20, 18, 26, 18, 290, 290, 26, 20, 12, 18, 20, 20, 26, 20, 26, 20, 12, 20, 16, 20, 20, 18, 16, 26, 20, 20, 20, 16, 16, 20, 26, 20, 12, 18, 18, 18, 20, 18, 20, 16, 20, 20, 26, 20, 16, 20, 20, 26, 24, 26, 12, 20, 26, 18, 19, 20, 25, 19, 19, 18, 18, 19, 20, 18, 20, 17, 19, 16, 19, 19, 17, 19, 19, 12, 26, 20, 20, 17, 20, 25, 18, 20, 15, 16, 17, 20, 20, 20, 26, 20, 16, 19, 26, 20, 18, 19, 17, 26, 26, 16, 18, 17, 20, 18, 23, 12, 26, 26, 25, 12, 19, 20, 17, 20, 26, 20, 20, 12, 20, 26, 20, 19, 20, 20, 18, 25, 20, 23, 20, 20, 15, 19, 19, 26, 24, 24, 23, 12, 20, 26, 26, 20, 26, 18, 24, 20, 20, 26, 24, 20, 23, 20, 20, 12, 20, 20, 17, 26, 18, 20, 20, 20, 26, 18, 18, 64, 64, 26, 20, 16, 20, 19, 20, 20, 26, 20, 20, 20, 20, 26, 24, 20, 26, 20, 26, 26, 12, 20, 20, 20, 26, 17, 20, 26, 18, 229, 26, 26, 26, 16, 19, 20, 20, 20, 16, 20, 20, 19, 19, 20, 17, 20, 19, 20, 16, 19, 17, 25, 20, 93, 26, 20, 20, 20, 26, 24, 20, 24, 20, 20, 20, 20, 12, 20, 20, 20, 26, 25, 26, 26, 26, 26, 18, 20, 20, 26, 20, 20, 20, 20, 161, 161, 26, 26, 18, 20, 26, 23, 20, 20, 16, 20, 26, 17, 24, 26, 23, 20, 19, 16, 19, 26, 17, 26, 18, 18, 18, 18, 92, 92, 26, 26, 26, 16, 18, 20, 19, 23, 23, 17, 23, 26, 26, 26, 23, 17, 26, 57, 57, 57, 57, 40, 12, 8, 8],
'text': ['', '', '', '', 'THE', 'COLCHESTER', 'MULTI-STRATEGY', 'GLOBAL', 'BOND', 'FUND', 'PLC', '', '', '', 'Additional', 'Subscription', 'Form', '', '', '', '(FOR', 'USE', 'BY', 'EXISTING', 'INVESTORS', 'IN', 'THE', 'COMPANY)', '', '', '', 'Name', 'of', 'Investor', '(Please', 'Print', 'or', 'Type)', '', 'Northern', 'Trust', 'Account', 'Number', '(if', 'known):', '', 'Designation', 'Code', '(nominee', 'accounts', 'only):', '', '', '', 'Please', 'select', 'the', 'Class', 'of', 'Shares', 'of', 'the', 'Company', 'in', 'which', 'you', 'wish', 'to', 'make', 'an', 'additional', 'subscription', 'by', 'marking', 'the', '', 'appropriate', 'tick', 'box', 'in', 'Appendix', '1', 'of', 'this', 'Additional', 'Subscription', 'Form.', 'Please', 'contact', 'the', 'Administrator', 'if', 'the', 'Class', 'of', '', 'Shares', 'you', 'wish', 'to', 'invest', 'in', 'is', 'not', 'listed', 'in', 'Appendix', '1.', '', '', '', 'Please', 'ensure', 'you', 'send', 'the', 'completed', 'form', 'by', 'fax', 'or', 'email', 'to', 'the', 'Administrator.', 'Forms', 'sent', 'by', 'fax', 'should', 'be', 'sent', 'to', 'the', '', 'Administrator', 'on', '+353', '1542', '2902.', 'If', 'you', 'wish', 'to', 'send', 'the', 'completed', 'form', 'to', 'the', 'Administrator', 'by', 'email,', 'please', 'ensure', 'that', '', 'you', 'read', 'and', 'comply', 'with', 'the', '‘Terms', '&', 'Conditions', 'of', 'Service', 'for', 'instructions', 'issued', 'to', 'the', 'Administrator', 'via', "Email'", 'which', 'can', '', 'be', 'found', 'in', 'the', 'Application', 'Form.', 'Emails', 'not', 'sent', 'in', 'accordance', 'with', 'the', 'procedures', 'will', 'not', 'be', 'processed', 'and', 'may', 'result', 'in', '', 'delays', 'to', 'your', 'investment.', 'If', 'you', 'require', 'same', 'day', 'dealing,', 'please', 'ensure', 'this', 'form', 'is', 'received', 'by', 'the', 'Administrator', 'no', 'later', '', 'than', 'the', 'Trade', 'Cut-Off', 'Time', '(1pm', 'Irish', 'time).', 'Cleared', 'funds', 'must', 'be', 'received', 'by', '6pm', '(Irish', 'time)', 'on', 'the', 'day', 'falling', 'three', '', 'Business', 'Days', 'after', 'the', 'Dealing', 'Day.', 'The', 'receiving', "banks'", 'details', 'can', 'be', 'found', 'in', 'Appendix', '2', 'of', 'this', 'Additional', 'Subscription', '', 'Form.', '', '', '', 'Please', 'contact', 'the', 'Administrator', 'if', 'the', 'banking', 'details', 'for', 'settlement', 'of', 'redemptions', '(and', 'if', 'applicable,', 'distribution', 'proceeds)', '', 'are', 'different', 'from', 'those', 'provided', 'in', 'the', 'Application', 'Form.', '', '', '', 'Kindly', 'note', 'that', 'the', 'trade', 'will', 'not', 'be', 'executed', 'until', 'the', 'form', 'is', 'received', 'and', 'deemed', 'to', 'be', 'in', 'good', 'order.', '', '', 'Preferred', 'date', 'of', 'Subscription', '(if', 'left', 'blank,', 'shares', 'will', 'be', 'issued', 'on', 'the', 'earliest', 'available', 'Dealing', 'Day):', '', '(dd/mm/yyyy)', '', '', 'Amount', 'of', 'the', 'Subscription:', '', '', '', 'Either', '', '', '', 'Subscription', 'in', 'shares:', '', '(amount', 'of', 'shares', 'to', 'be', 'purchased', 'in', 'words)', '', '(amount', 'of', 'shares', 'to', 'be', 'purchased', 'in', 'figures)', '', '', '', 'Or', '', '', '', 'Subscription', 'by', 'amount', 'in', 'relevant', 'currency:', '', '(amount', 'in', 'words)', '', '', '', '(amount', 'in', 'figures)', '', '', '', 'GS', 'COLCHESTER*', '1', '', 'Genero']}
def check(word, d): phrases = [w for w in word.split()] for i, t in enumerate(d['text']): to_match = d['text'][i+1: i+1+len(phrases)] if all(p == m for p, m in zip(phrases, to_match)):
print(to_match) # analyze whats wrong
return [*map(lambda x:d[x][i], ['left', 'top', 'width', 'height'])] terms = ["Print Name of Subscriber", "Investor's Name", "Name of Investor"] for term in terms: ch = check(term, d) # rename, dont overwrite your function print(ch)
Debug output:
[] # print(to_match)
[151, 2078, 170, 8]
[] # print(to_match)
[151, 2078, 170, 8]
['Name', 'of', 'Investor'] # print(to_match)
[94, 407, 433, 26]
and fix it:
# dont return if seems to_match is [] (aka Falsy)
if to_match and all(p == m for p, m in zip(phrases, to_match)):
print(to_match) # seems to_match is [] for your faulty results .. so
return [*map(lambda x:d[x][i], ['left', 'top', 'width', 'height'])]
New output:
None
None
[94, 407, 433, 26]
Answered By - Patrick Artner
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.