Hi There,
I recently migrated to tesseract 4 and tried pytesseract. But that is too slow for the kind of application I am creating.
I wanted to write my own python wrapper that would use the tesseract.so.4 directly.
But I have issues initializing TessBaseAPIInit4().
I have multiple variables that I want to set and it keeps getting segmentation fault.
I am not sure what I am doing wrong. I also tried the C++ version of it.
Please let me know if it is a known problem.
import os
from ctypes import *
lang = "eng"
filename = "/data/framecache/testing.jpg"
libname = "/usr/local/lib/libtesseract.so.4"
TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
if not TESSDATA_PREFIX:
TESSDATA_PREFIX = "/home/vagrant/reelz_base/shared/tessdata"
print "TESSDATA_PREFIX={}".format(TESSDATA_PREFIX)
tesseract = cdll.LoadLibrary(libname)
tesseract.TessVersion.restype = c_char_p
tesseract_version = tesseract.TessVersion()
api = tesseract.TessBaseAPICreate()
print api
vars_vec = ["tesseract_char_whitelist",
"tessedit_pageseg_mode",
"image_default_resolution"]
vars_values = ["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
"6",
"70"]
vec_arr_type = c_char_p * len(vars_vec)
vars_vec_arr = vec_arr_type()
for i, val in enumerate(vars_vec):
vars_vec_arr[i] = val
print "vars_vec_arr={}".format(vars_vec_arr)
val_arr_type = c_char_p * len(vars_values)
vars_val_arr = val_arr_type()
for i, val in enumerate(vars_values):
vars_val_arr[i] = val
print "vars_val_arr={}".format(vars_val_arr)
rc = tesseract.TessBaseAPIInit4(api, TESSDATA_PREFIX, lang , 2, None, 0, vars_vec_arr, vars_val_arr, 0, c_bool(False))