#!/usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import print_function, division, absolute_import
from os import environ
from ctypes import CDLL, POINTER, Structure, c_char_p, c_bool
from ctypes.util import find_library
LIBTESS = find_library('libtesseract')
LIBLEPT = find_library('liblept')
TESSDATA_PREFIX = environ.get('TESSDATA_PREFIX', '/opt/local/share')
class TessBaseAPI(Structure):
pass
class Pix(Structure):
pass
class TessPageIterator(Structure):
pass
def create_tess_api(prefix=TESSDATA_PREFIX):
tesseract = CDLL(LIBTESS)
leptonica = CDLL(LIBLEPT)
base_api = POINTER(TessBaseAPI)
p_iter = POINTER(TessPageIterator)
argtypes = [base_api, c_char_p, c_char_p]
tesseract.TessBaseAPICreate.restype = base_api
tesseract.TessBaseAPIInit3.argtypes = argtypes
tesseract.TessBaseAPIInit3.restype = c_bool
tesseract.TessBaseAPISetImage2.restype = None
tesseract.TessBaseAPISetImage2.argtypes = [base_api, POINTER(Pix)]
tesseract.TessBaseAPIAnalyseLayout.argtypes = [base_api]
tesseract.TessBaseAPIAnalyseLayout.restype = p_iter
api = tesseract.TessBaseAPICreate()
tesseract.TessBaseAPIInit3(api, prefix.encode('utf-8'), b'eng')
leptonica.pixRead.argtypes = [c_char_p]
leptonica.pixRead.restype = POINTER(Pix)
return tesseract, leptonica, api
tesseract, leptonica, api = create_tess_api()
path = b'eurotext.tif'
tesseract.TessBaseAPISetPageSegMode(api, 1)
pix = leptonica.pixRead(path)
tesseract.TessBaseAPISetImage2(api, pix)
print(tesseract.TessBaseAPIAnalyseLayout(api))