Tess4J returns wrong font type

393 views
Skip to first unread message

Mustak M

unread,
Jul 18, 2014, 7:34:15 AM7/18/14
to tesser...@googlegroups.com
I am using Java wrapper Tess4J. Using following code to retrieve the font type and font size from an image.  The image contains "Arial" font text and font size is 48.  The code is returning font type as
font: DejaVu_Sans_Ultra-Light, size: 13
 
Code:

public

static void testResultIterator() throws Exception {

String datapath =

"tessdata";

String language =

"eng";

String expOCRResult =

"The (quick) [brown] {fox} jumps!\nOver the $43,456.78 <lazy> #90 dog";

TessAPI1.TessBaseAPI handle;

handle = TessAPI1.TessBaseAPICreate();

System.

out.println("TessBaseAPIGetIterator");

String lang =

"eng";

File tiff =

new File("C:\\1\\CaptureArial.JPG");

BufferedImage image = ImageIO.read(

new FileInputStream(tiff)); // require jai-imageio lib to read TIFF

ByteBuffer buf = ImageIOHelper.convertImageData(image);

int bpp = image.getColorModel().getPixelSize();

int bytespp = bpp / 8;

int bytespl = (int) Math.ceil(image.getWidth() * bpp / 8.0);

TessAPI1.TessBaseAPIInit3(handle,

"tessdata", lang);

TessAPI1.TessBaseAPISetPageSegMode(handle, TessAPI1.TessPageSegMode.

PSM_AUTO);

TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);

TessAPI1.TessBaseAPIRecognize(handle,

null);

TessAPI1.TessResultIterator ri = TessAPI1.TessBaseAPIGetIterator(handle);

TessAPI1.TessPageIterator pi = TessAPI1.TessResultIteratorGetPageIterator(ri);

TessAPI1.TessPageIteratorBegin(pi);

System.

out.println("Bounding boxes:\nchar(s) left top right bottom confidence font-attributes");

// int height = image.getHeight();

do {

Pointer ptr = TessAPI1.TessResultIteratorGetUTF8Text(ri, TessAPI1.TessPageIteratorLevel.

RIL_WORD);

String word = ptr.getString(0);

TessAPI1.TessDeleteText(ptr);

float confidence = TessAPI1.TessResultIteratorConfidence(ri, TessAPI1.TessPageIteratorLevel.RIL_WORD);

IntBuffer leftB = IntBuffer.allocate(1);

IntBuffer topB = IntBuffer.allocate(1);

IntBuffer rightB = IntBuffer.allocate(1);

IntBuffer bottomB = IntBuffer.allocate(1);

TessAPI1.TessPageIteratorBoundingBox(pi, TessAPI1.TessPageIteratorLevel.

RIL_WORD, leftB, topB, rightB, bottomB);

int left = leftB.get();

int top = topB.get();

int right = rightB.get();

int bottom = bottomB.get();

System.

out.print(String.format("%s %d %d %d %d %f", word, left, top, right, bottom, confidence));

// System.out.println(String.format("%s %d %d %d %d", str, left, height - bottom, right, height - top)); // training box coordinates

IntBuffer boldB = IntBuffer.allocate(1);

IntBuffer italicB = IntBuffer.allocate(1);

IntBuffer underlinedB = IntBuffer.allocate(1);

IntBuffer monospaceB = IntBuffer.allocate(1);

IntBuffer serifB = IntBuffer.allocate(1);

IntBuffer smallcapsB = IntBuffer.allocate(1);

IntBuffer pointSizeB = IntBuffer.allocate(1);

IntBuffer fontIdB = IntBuffer.allocate(1);

String fontName = TessAPI1.TessResultIteratorWordFontAttributes(ri, boldB, italicB, underlinedB,

monospaceB, serifB, smallcapsB, pointSizeB, fontIdB);

boolean bold = boldB.get() == TessAPI1.TRUE;

boolean italic = italicB.get() == TessAPI1.TRUE;

boolean underlined = underlinedB.get() == TessAPI1.TRUE;

boolean monospace = monospaceB.get() == TessAPI1.TRUE;

boolean serif = serifB.get() == TessAPI1.TRUE;

boolean smallcaps = smallcapsB.get() == TessAPI1.TRUE;

int pointSize = pointSizeB.get();

int fontId = fontIdB.get();

System.

out.println(String.format(" font: %s, size: %d, font id: %d, bold: %b," +

" italic: %b, underlined: %b, monospace: %b, serif: %b, smallcap: %b",

fontName, pointSize, fontId, bold, italic, underlined, monospace, serif, smallcaps));

}

while
(TessAPI1.TessPageIteratorNext(pi, TessAPI1.TessPageIteratorLevel.RIL_WORD) == TessAPI1.TRUE);

}

---------

Its the same code refered from

http://sourceforge.net/p/tess4j/code/HEAD/tree/Tess4J_3/trunk/test/net/sourceforge/tess4j/TessAPI1Test.java#l447

----

Any thoughts !

Image file attached for reference.

 

Thanks

 

 

CaptureArial.JPG

Quan Nguyen

unread,
Jul 18, 2014, 10:30:39 AM7/18/14
to tesser...@googlegroups.com
Reply all
Reply to author
Forward
0 new messages