Hi
I posted this on stackoverflow but got no response...
I am trying to read subtitles from an image taken from the news using tesserract on python.
for some reasons I get better results when saving the file using plt and using tesseract reading it from there
cv2?import urllib3
import requests
import numpy as np
import pytesseract
import matplotlib.pyplot as plt
from PIL import Image
def downloadFile():
url = 'https://drive.google.com/uc?export=download&id=0B7t_yZLolnbiaVpicnEwbDRjTmc'
http = urllib3.PoolManager()
r = http.request('GET',url)
f = open('testing.npy', 'wb')
f.write(r.data)
downloadFile()
frame = np.load('testing.npy')
new_frame = frame[170:210,8:195]
plt.imshow(new_frame)
plt.axis('off')
plt.savefig('plt.png')
print('from array: ' + pytesseract.image_to_string(Image.fromarray(new_frame),lang = 'eng'))
print( 'from plt: ' + pytesseract.image_to_string(Image.open('plt.png'),lang = 'eng'))Thank you!