from pytesseract import image_to_string
import pytesseract
import cv2
import re
pytesseract.pytesseract.tesseract_cmd = r'C:\Users\Bill-pc.Admin-PC\AppData\Local\Tesseract-OCR\tesseract.exe'
img = cv2.imread(r'D:\Bill-pc\Documents\Excel Documents\PNG\2017-03-26_SecondPie.png', cv2.IMREAD_GRAYSCALE)
height, width = img.shape
roi = img[height - 41: height, 2: width]
roi = cv2.resize(roi, None, fx=1.006, fy=1.006)
_, th = cv2.threshold(roi, 253, 255, cv2.THRESH_BINARY)
text_detected = image_to_string(roi, config="--psm 10 --oem 3 tessedit_char_whitelist=0123456789", )
text_detected = re.sub('I', '1', text_detected)
text_detected = re.sub('i', '1', text_detected)
text_detected = re.sub('l', '1', text_detected)
text_detected = re.sub('L', '1', text_detected)
text_detected = re.sub('Z', '2', text_detected)
text_detected = re.sub('S', '5', text_detected)
text_detected = re.sub('s', '5', text_detected)
text_detected = re.sub('G', '6', text_detected)
numbers = re.findall("[0-9]+", text_detected)
print(text_detected)
cv2.imshow("th", roi)
print(numbers)
#print(text[5] + text[6] + text[7])
cv2.waitKey(0)
--
You received this message because you are subscribed to the Google Groups "tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email to tesseract-oc...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/2ca084e4-aae6-423e-b359-a472e00579e6%40googlegroups.com.
Thank you Aaron for the information, it was an improvement, I'm attaching one of the png files that I read. (I have 200)It is interesting how It is still not reading every file 100% correctly. My script counts the digits and sometimes it misses one of them or it calls a 1 a 15.Maybe I'm expecting perfection from computer vision and that's just not the case!Thanks againBill Upham
On Sat, Mar 14, 2020 at 3:03 PM Aaron Stewart <bigbowlo...@gmail.com> wrote:
--
roi = cv2.resize(roi, None, fx=2, fy=2)_, roi = cv2.threshold(roi, 128+64, 255, cv2.THRESH_BINARY)roi = cv2.GaussianBlur(roi, (3,3), 0)text_detected = image_to_string(roi, config="--psm 10 --oem 3 tessedit_char_whitelist=0123456789", )
You received this message because you are subscribed to the Google Groups "tesseract-ocr" group.
To unsubscribe from this group and stop receiving emails from it, send an email to tesser...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/tesseract-ocr/CAF5KrqB3HiPT3cKP6QLUR4u%2Bu3W1B7VbdUfKLfBYs-HnumwZWg%40mail.gmail.com.