how to make searchable pdf with tessaract C++ 4.01. API

71 views
Skip to first unread message

Ivica Anic

unread,
Oct 24, 2019, 12:02:49 PM10/24/19
to tesseract-ocr
Hi

if someone can help me with a tessaract (API-4.01) C ++ example, how can I create a searchable pdf

example below does not work
i get a pdf output file but pdf output file is not readable

Kinds Regards
Ivica Anic


###########################################################################
const char* output_baseqq = "my_first_tesseract_pdfqq";
char *datapath = "C:\\Users\\iOCRKonverter3\\redist";
int timeout_msq = 5000;
const char* retry_configq = nullptr;
bool textonlyq = false;
int jpg_qualityq = 92;
//Pix *p = pixRead(inputImage.c_str());
PIXA *sourceImg__ = pixaReadMultipageTiff(inputImage.c_str());



tesseract::TessBaseAPI *apiq = new tesseract::TessBaseAPI();
apiq->SetPageSegMode(tesseract::PSM_AUTO_ONLY);
if (apiq->Init(datapath, "deu", tesseract::OEM_DEFAULT)) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
if (sourceImg__->n > 0) {
for (int i = 0; i < sourceImg__->n; i++) {

Pix* image = sourceImg__->pix[i];
pixScale(image, 4.167, 4.167);
apiq->SetImage(image);
}
}
apiq->Recognize(0);
apiq->SetInputName(inputImage.c_str());
apiq->SetOutputName(output_baseqq);
//tesseract::TessResultRenderer *rendererq1 = new tesseract::TessPDFRenderer(apiq->GetDatapath());
tesseract::TessPDFRenderer *rendererq = new tesseract::TessPDFRenderer(output_baseqq, apiq->GetDatapath(), textonlyq);
rendererq->BeginDocument("test");
rendererq->AddImage(apiq);
//bool succeed = false;
bool succeed = apiq->ProcessPages(inputImage.c_str(), retry_configq, timeout_msq, rendererq);

rendererq->EndDocument();

if (!succeed) {
fprintf(stderr, "Error during processing.\n");
return EXIT_FAILURE;
}
apiq->End();
return EXIT_SUCCESS;
Reply all
Reply to author
Forward
0 new messages