How to know how many symbol is a word in pagelayout?

28 views
Skip to first unread message

Dbsk Dbsk

unread,
Aug 11, 2017, 8:56:44 AM8/11/17
to tesseract-ocr
i can use the code below to draw every word and every symbol bounding box, now i want to if i can know how many symbol in the word  when i got a word?  

thanks for any info!

=================================
#include <iostream>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

#include <stdio.h>

using namespace std;

int main() {
    std::cout << "Hello, World!" << std::endl;
    tesseract::TessBaseAPI api ;
    api.InitForAnalysePage();

    api.SetPageSegMode(tesseract::PSM_SPARSE_TEXT);

    Pix *image = pixRead("/tmp/ytmp/en4.png");

    //process gray color to white

    l_uint32 pixel_color;

    l_int32 r,g,b;

    l_int32 width,height,depth;

    width=0;
    height=0;



    pixGetDimensions(image,&width,&height,&depth);

    printf("w=%d h=%d dep=%d\n",width,height,depth);

    api.SetImage(image);

    tesseract::PageIterator *iter = api.AnalyseLayout(true);


    int word_count=0;
    while (iter->Next(tesseract::RIL_WORD)) {
        int left, top, right, bottom;
        ++word_count;
        iter->BoundingBox(
                tesseract::RIL_WORD,
                &left, &top, &right, &bottom
        );

     //===============================================================
      //i got the word bounding box, but i want to know how may symbol in this word?
      //===============================================================


        pixRenderLine(image,left,top,left,bottom,3,L_CLEAR_PIXELS);
        pixRenderLine(image,left,top,right,top,3,L_CLEAR_PIXELS);
        pixRenderLine(image,left,bottom,right,bottom,3,L_CLEAR_PIXELS);
        pixRenderLine(image,right,top,right,bottom,3,L_CLEAR_PIXELS);



    }


    iter->Begin();
    while (iter->Next(tesseract::RIL_SYMBOL)) {
        int left, top, right, bottom;
        ++word_count;
        iter->BoundingBox(
                tesseract::RIL_SYMBOL,
                &left, &top, &right, &bottom
        );

        pixRenderLine(image,left,top,left,bottom,1,L_CLEAR_PIXELS);
        pixRenderLine(image,left,top,right,top,1,L_CLEAR_PIXELS);
        pixRenderLine(image,left,bottom,right,bottom,1,L_CLEAR_PIXELS);
        pixRenderLine(image,right,top,right,bottom,1,L_CLEAR_PIXELS);

    }

    pixWrite("/tmp/ytmp/entt.png",image,IFF_PNG);


    return 0;
}
Reply all
Reply to author
Forward
0 new messages