import cv2
import numpy
import subprocess
frame = cv2.imread("image.jpg", 1)
command = ["tesseract",
'stdin',
'stdout']
tesseract_process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
result = tesseract_process.communicate(input=frame.tostring())[0]
result = tesseract_process.stdin.write(frame.tostring())
print(result.decode())
Error in fopenReadStream: file not found$" #! !! && ** ** ** ** ++ *- '* (+ (+ *- #& "$ "$ !# ! " "$ !# ! " " ! ! ! ! ! " " ! " " ! " # " ! !$" #! !! && ** ** ** ** ++ *- '* (+ (+ *- #& "$ "$ !# ! " "$ !# ! " " ! ! ! ! ! " " ! " " ! " # " ! ! cannot be read!Error during processing.ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 02770A70 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatalstm-punc-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 02752128 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatalstm-word-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 027521D8 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatalstm-number-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 0270BF30 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatapunc-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 035D29A8 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddataword-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 02F63088 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatanumber-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 02F67A88 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatabigram-dawg)ObjectCache(5A2E0A88)::~ObjectCache(): WARNING! LEAK! object 02F67B30 still has count 1 (id \Program Files (x86)\Tesseract-OCR\tessdata/eng.traineddatafreq-dawg)
tesseract_process.stdin.write(frame.tostring())
output = tesseract_process.stdout.read()
print output
Tesseract stdin doesn't accept raw numpy frames, it needs to be encoded in an image format (like png/bmp etc).
This is exactly what the Piltesseract library does using the PIL library:
https://github.com/Digirolamo/PILtesseract/blob/master/piltesseract/tesseractwrapper.py#L131:L143
In opencv you can do the equivalent using cv2.imencode
http://docs.opencv.org/3.0-beta/modules/imgcodecs/doc/reading_and_writing_images.html#imencode
The following code works for me:
John