c-api return null PageIterator (Empty page) on rotated image #326

71 views
Skip to first unread message

Reuben Cummings

unread,
May 19, 2016, 10:14:34 AM5/19/16
to tesseract-ocr
I get a null PageIterator (Empty page) via the c-api from python on some rotated images (the cli executable works fine) see issue #326

image: eurotext.tif

setup

reubano@tokpro [~]⚡ convert eurotext.tif -rotate 3 +repage eurotext_03.tif
reubano@tokpro
[~]⚡ convert eurotext.tif -rotate 5 +repage eurotext_05.tif

`bug.py`

#!/usr/bin/python
# -*- coding: utf-8 -*-

from __future__ import print_function, division

from os import path as p, environ
from ctypes import (
    CDLL
, POINTER, Structure, c_char_p, c_bool, c_int, c_float, byref)

from ctypes.util import find_library

LIBTESS
= find_library('libtesseract.dylib')
LIBLEPT
= find_library('liblept.dylib')
TESSDATA_PREFIX
= environ.get('TESSDATA_PREFIX', '/opt/local/share')


class TessBaseAPI(Structure):
   
pass


class Pix(Structure):
   
pass


class TessPageIterator(Structure):
   
pass


def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'):
    tesseract
= CDLL(LIBTESS)
    leptonica
= CDLL(LIBLEPT)
    base_api
= POINTER(TessBaseAPI)
    argtypes
= [base_api, c_char_p, c_char_p]

    tesseract
.TessBaseAPICreate.restype = base_api
    tesseract
.TessBaseAPIInit3.argtypes = argtypes
    tesseract
.TessBaseAPIInit3.restype = c_bool
    tesseract
.TessBaseAPISetImage2.restype = None
    tesseract
.TessBaseAPISetImage2.argtypes = [base_api, POINTER(Pix)]
    tesseract
.TessBaseAPIAnalyseLayout.argtypes = [base_api]
    tesseract
.TessBaseAPIAnalyseLayout.restype = POINTER(TessPageIterator)
    tesseract
.TessPageIteratorOrientation.argtypes = [
        POINTER
(TessPageIterator), POINTER(c_int), POINTER(c_int),
        POINTER
(c_int), POINTER(c_float)]

    tesseract
.TessPageIteratorOrientation.restype = None

    api
= tesseract.TessBaseAPICreate()
    tesseract
.TessBaseAPIInit3(api, prefix, lang)

    leptonica
.pixRead.argtypes = [c_char_p]
    leptonica
.pixRead.restype = POINTER(Pix)
   
return tesseract, leptonica, api

def get_orientation(tesseract, leptonica, api, path, mode=1):
    tesseract
.TessBaseAPISetPageSegMode(api, mode)
    pix
= leptonica.pixRead(path)
    tesseract
.TessBaseAPISetImage2(api, pix)
    it
= tesseract.TessBaseAPIAnalyseLayout(api)

   
if it:
        orientation
, direction, line_order = c_int(), c_int(), c_int()
        skew
= c_float()

        tesseract
.TessPageIteratorOrientation(
            it
, byref(orientation), byref(direction), byref(line_order),
            byref
(skew))

       
print('%s: %s' % (path, orientation.value))

if __name__ == '__main__':
   
for path in ['eurotext.tif', 'eurotext_03.tif', 'eurotext_05.tif']:
        tesseract
, leptonica, api = create_tess_api()
        orientation
= get_orientation(tesseract, leptonica, api, path)


output

reubano@tokpro [~]⚡ python bug.py
eurotext
.tif: 0
eurotext_03
.tif: 0
Empty page!!

reubano@tokpro
[~]⚡ tesseract eurotext_05.tif - -psm 0
Orientation: 0
Orientation in degrees: 0
Orientation confidence: 18.72
Script: 1
Script confidence: 19.08



mac osx 10.9.5
system

reubano@tokpro [~]⚡ tesseract --version
tesseract
3.04.00
 leptonica
-1.71
  libgif
4.2.3 : libjpeg 9a : libpng 1.6.21 : libtiff 4.0.6 : zlib 1.2.8 : libwebp 0.5.0 : libopenjp2 2.1.0





Reuben Cummings

unread,
May 20, 2016, 2:32:36 AM5/20/16
to tesseract-ocr
If i try to run the script with psm mode=0 (like the executable) i dont get any results either. Further research has led me to the DetectOS function which seems useful.

Reuben Cummings

unread,
May 20, 2016, 12:18:09 PM5/20/16
to tesseract-ocr
I found some helpful code on the forum:

OSResults *orientationStruct = new OSResults();
bool gotOrientation = myTess->DetectOS(orientationStruct);
int bestOrientation = -1;
float bestOrientationScore = 0;

if ((gotOrientation) && (orientationStruct->orientations != NULL)) {
   
for (int i=0; i<4; i++) {
       
if (orientationStruct->orientations[i] > bestOrientationScore) {
            bestOrientation
= i;
            bestOrientationScore
= orientationStruct->orientations[i];
       
}
   
}
}

// This is the result we were asked for
results
.textOrientation = bestOrientation;

But unfortunately, OSResults isn't implemented in the c-api. Any suggestions?


Reuben Cummings

unread,
May 20, 2016, 5:22:48 PM5/20/16
to tesseract-ocr

Thanks to pyocr I figured it out!


# ... /snip
# https://github.com/jflesch/pyocr/blob/master/src/pyocr/libtesseract/tesseract_raw.py
class OSResults(Structure):
    _fields_
= [
       
('orientations', c_float * 4),
       
('scripts_na', c_float * 4 * (116 + 1 + 2 + 1)),
       
('unicharset', c_void_p),
       
('best_orientation_id', c_int),
       
('best_script_id', c_int),
       
('best_sconfidence', c_float),
       
('best_oconfidence', c_float),
       
('padding', c_char_p * 512),
   
]

# ... /snip


def create_tess_api(prefix=TESSDATA_PREFIX, lang='eng'):

   
# ... /snip
    tesseract
.TessBaseAPIDetectOS.argtypes = [base_api, POINTER(OSResults)]
    tesseract
.TessBaseAPIDetectOS.restype = c_bool
   
# ... /snip

def get_orientation(tesseract, leptonica, api, path, mode=0):

    tesseract
.TessBaseAPISetPageSegMode(api, mode)
    pix
= leptonica.pixRead(path)
    tesseract
.TessBaseAPISetImage2(api, pix)

    osr
= OSResults()
    it
= tesseract.TessBaseAPIDetectOS(api, byref(osr))

   
if it and osr:

        orientation
, direction, line_order = c_int(), c_int(), c_int()
        skew
= c_float()

        tesseract
.TessPageIteratorOrientation(
            it
, byref(orientation), byref(direction), byref(line_order),
            byref
(skew))


       
print('%s: %s' % (path, osr.best_orientation_id))
       
print('confidence: %s' % osr.best_oconfidence)
Reply all
Reply to author
Forward
0 new messages