Issue with Tesseract OCR: Difficulty Detecting White Text on Blue Background

Abdul Kalam Shaik

unread,

Aug 19, 2024, 12:45:42 PM8/19/24

to tesseract-ocr

Hello,

I am encountering an issue with Tesseract OCR when trying to detect white text on a blue background. Despite various preprocessing techniques, the OCR is not accurately recognizing the text on this specific background.

Details:

Tesseract Version: tesseract v5.0.0-alpha.20210506
Language Pack: English
Image Characteristics:
Background color: Blue
Text color: White
Image resolution: 1920X1080P
Image format:PNG
Preprocessing Techniques Applied:
1. Grayscale conversion
2. Contrast adjustment
3. Binary thresholding
4. Inversion of the image
5. Morphological operations
6. Increase Contrast

7. ROI

8. Convert the image to the HSV color space, Create a mask to isolate blue regions,Invert the mask to focus on the text and Using the mask to extract the white text

Script/Code Used:

import cv2
import pytesseract
import pyautogui
import time
import numpy as np

# Specify the path to the Tesseract executable if not in PATH
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

def preprocess_image_gray(image):
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imshow("Gray Scale Image", gray)
cv2.waitKey(0)
cv2.destroyAllWindows()
return gray

def preprocess_image_increase_contrast(image):
# Increase contrast
contrast = cv2.convertScaleAbs(image, alpha=1.5, beta=0)
cv2.imshow("Increase contrast", contrast)
cv2.waitKey(0)
cv2.destroyAllWindows()
return contrast

def preprocess_image_gaussian_blur(image):
# Apply Gaussian blur
blurred = cv2.GaussianBlur(image, (5, 5), 0)
cv2.imshow("GaussianBlur", blurred)
cv2.waitKey(0)
cv2.destroyAllWindows()
return blurred

def preprocess_image_edge_detection(image):
# Perform edge detection
edged = cv2.Canny(image, 50, 150)
cv2.imshow("edge detection", edged)
cv2.waitKey(0)
cv2.destroyAllWindows()
return edged

def preprocess_image_inverted(image):
# Invert the image
inverted_image = cv2.bitwise_not(image)
cv2.imshow("Inverted Image", inverted_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

return inverted_image

def preprocess_image_dialte_edges(image):
# Dilate the edges
dilated = cv2.dilate(image, None, iterations=2)
cv2.imshow("dilate", dilated)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Bitwise-AND mask and original image
result = cv2.bitwise_and(image, image, mask=dilated)
cv2.imshow("Bitwise-AND mask and original image", result)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Invert the image
inverted_image = cv2.bitwise_not(result)
cv2.imshow("Inverted Image", inverted_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

return inverted_image

def perform_ocr(image_path, text_to_find=None, config="--psm 6 --oem 3", preprocess_func=preprocess_image_gray):
global ocr_results
try:
image = cv2.imread(image_path)
image_preprocessed = preprocess_func(image)
image_rgb = cv2.cvtColor(image_preprocessed, cv2.COLOR_BGR2RGB)
ocr_data = pytesseract.image_to_data(image_rgb, output_type=pytesseract.Output.DICT, config=config)

if text_to_find is not None and not isinstance(text_to_find, list):
text_to_find = [text_to_find]

ocr_results = []
for i in range(len(ocr_data['text'])):
text = ocr_data['text'][i].strip()
if not text:
continue

confidence = float(ocr_data['conf'][i]) / 100.0 # Convert confidence to decimal
if confidence < 0.2: # Ignore results with confidence less than 0.5
continue

bbox = {
"text": text,
"left": ocr_data['left'][i],
"right": ocr_data['left'][i] + ocr_data['width'][i],
"top": ocr_data['top'][i],
"bottom": ocr_data['top'][i] + ocr_data['height'][i],
"confidence": float(ocr_data['conf'][i]) / 100.0 # Convert confidence to decimal
}
ocr_results.append(bbox)
except Exception as e:
print(f"An error occurred in the main function: {e}")

return ocr_results

def draw_boxes(image_path, ocr_results, output_image_path):
image = cv2.imread(image_path)

for result in ocr_results:
x, y, w, h = result['left'], result['top'], result['right'] - result['left'], result['bottom'] - result['top']
cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
distance_text = f"{result['text']} ({result['confidence']:.2f})"
if 'distance' in result:
distance_text += f" ({result['distance']:.2f})"
cv2.putText(image, distance_text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

cv2.imwrite(output_image_path, image)

def increase_brightness(img, value=50):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)

v = cv2.add(v, value)
v[v > 255] = 255
v[v < 0] = 0

final_hsv = cv2.merge((h, s, v))
brightened_img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
return brightened_img

def isolate_white_text_on_blue(image):
# Increase brightness
brightened = increase_brightness(image, value=60)
cv2.imshow("Brightened image", brightened)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Convert the image to the HSV color space
hsv = cv2.cvtColor(brightened, cv2.COLOR_BGR2HSV)
cv2.imshow("HSV converted image", hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Define the range of blue colors in HSV
lower_blue = np.array([100, 150, 0])
upper_blue = np.array([140, 255, 255])

# Create a mask to isolate blue regions
blue_mask = cv2.inRange(hsv, lower_blue, upper_blue)
cv2.imshow("Blue Mask image", blue_mask)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Invert the mask to focus on the text
blue_mask_inv = cv2.bitwise_not(blue_mask)
cv2.imshow(" Inverted Mask image", blue_mask_inv)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Use the mask to extract the white text
white_text_on_blue = cv2.bitwise_and(brightened, brightened, mask=blue_mask_inv)
cv2.imshow("White Text image", blue_mask_inv)
cv2.waitKey(0)
cv2.destroyAllWindows()

return white_text_on_blue

def move_and_click(ocr_results):
for result in ocr_results:
x_center = (result['left'] + result['right']) // 2
y_center = (result['top'] + result['bottom']) // 2
pyautogui.moveTo(x_center, y_center)
pyautogui.click()
time.sleep(1) # Sleep for a second between clicks for safety

def main():
image_path = 'path-to-image.png'
output_json_path = 'path-to-image.json'
output_image_path = 'path-to-outputimage.png'

text_to_find = []

ocr_results_gray = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_gray)
print(f"OCR gray results are: {ocr_results_gray}")
ocr_results_contrast = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_increase_contrast)
print(f"OCR contrast results are: {ocr_results_contrast}")
ocr_results_gaussian = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_gaussian_blur)
print(f"OCR gaussian results are: {ocr_results_gaussian}")
ocr_results_edge = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_edge_detection)
print(f"OCR edge results are: {ocr_results_edge}")
ocr_results_dialte = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_dialte_edges)
print(f"OCR Dialte results are: {ocr_results_dialte}")
ocr_results_invert = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=preprocess_image_inverted)
print(f"OCR Invert results are: {ocr_results_invert}")
ocr_results_isolate = perform_ocr(image_path, text_to_find, config="--psm 11 --oem 3",
preprocess_func=isolate_white_text_on_blue)
print(f"OCR Isolate results are: {ocr_results_isolate}")

ocr_results = []
if isinstance(ocr_results_gray, list) and isinstance(ocr_results_isolate, list) and isinstance(ocr_results_invert,
list) and isinstance(
ocr_results_contrast, list) and isinstance(ocr_results_gaussian, list) and isinstance(ocr_results_dialte,
list) and isinstance(
ocr_results_edge, list):
ocr_results = ocr_results_isolate + ocr_results_gray + ocr_results_contrast + ocr_results_gaussian + ocr_results_dialte + ocr_results_edge + ocr_results_invert
else:
print("OCR results are not in the expected list format.")

for i, result in enumerate(ocr_results, start=1):
bounding_box_info = (
f"Bounding box: Text = {result['text']}, Left = {result['left']}, Top = {result['top']}, "
f"Right = {result['right']}, Bottom = {result['bottom']}, Confidence = {result['confidence']:.2f}"
)
print(bounding_box_info)
print()

draw_boxes(image_path, ocr_results, output_image_path)

print(ocr_results)

if __name__ == "__main__":
main()

Issue:
Despite trying the above preprocessing techniques, the OCR output is still missing or incorrectly recognizing the text on the blue background. I have also tried adjusting the thresholding and brightness levels, but without success.

Question:
Could anyone provide suggestions on additional preprocessing techniques or modifications to the Tesseract OCR settings that might help improve the detection accuracy for white text on a blue background or any other colored background?

Thank you in advance for your assistance!

Ger Hobbelt

unread,

Aug 20, 2024, 6:43:42 AM8/20/24

to tesser...@googlegroups.com

Generally, it is best to convert to greyscale with black text on white background. Seems you tried that so questions remain.
Please include one or two sample images which exhibits your problem, so folks around here have something to test against.

Ciao,

Ger

--
You received this message because you are subscribed to the Google Groups "tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-oc...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/0ce2e54e-1f1a-4fca-8c74-286c9641509en%40googlegroups.com.

Abdul Kalam Shaik

unread,

Aug 22, 2024, 3:11:37 AM8/22/24

to tesseract-ocr

Thanks Ger for your response. So, my use case is like when ever there is a colored background I'm unable to detect the text. Attached few use cases where I was facing difficulty in detecting the text.

Regards,

Shaik Abdul Kalam.

NewTeamsSignInPage.png

PasswordNewTeams.png

Zdenko Podobny

unread,

Aug 22, 2024, 4:17:24 AM8/22/24

to tesser...@googlegroups.com

Tesseract is the OCR engine and it is not a text detection tool.

If you pass just blue button to tesseract, it has no problem to extract text:

tesseract blue_button.png -
Sign in

Zdenko

št 22. 8. 2024 o 9:11 Abdul Kalam Shaik <shaikabdu...@gmail.com> napísal(a):

To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/6c7383e8-81a1-4259-9df5-1185ad84b077n%40googlegroups.com.

Abdul Kalam Shaik

unread,

Aug 22, 2024, 6:13:44 AM8/22/24

to tesseract-ocr

Thanks for the response, Can you please help me, how can I detect text in my use case?