public void iterateOCR(string imgLoc)
{
Rect _pdfRect = new Rect(0, 0, 612, 792); // Entire page - PDF coordinate system
try
{
using (var ocr = new Tesseract.TesseractEngine(@"./tessdata", "eng", Tesseract.EngineMode.Default))
{
using (var img = Pix.LoadFromFile(imgLoc))
{
using (var page = ocr.Process(img,_pdfRect, PageSegMode.Auto))
{
Console.WriteLine("Text (iterator):");
using (var iter = page.GetIterator())
{
iter.Begin();
do
{
do
{
do
{
do
{
if (iter.IsAtBeginningOf(PageIteratorLevel.Block))
{
Console.WriteLine("<BLOCK>");
}
Console.Write(iter.GetText(PageIteratorLevel.Word));
Console.Write(" ");
if (iter.IsAtFinalOf(PageIteratorLevel.TextLine, PageIteratorLevel.Word))
{
Console.WriteLine();
}
}
while (iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word));
if (iter.IsAtFinalOf(PageIteratorLevel.Para, PageIteratorLevel.TextLine))
{
Console.WriteLine();
}
} while (iter.Next(PageIteratorLevel.Para, PageIteratorLevel.TextLine));
} while (iter.Next(PageIteratorLevel.Block, PageIteratorLevel.Para));
} while (iter.Next(PageIteratorLevel.Block));
} //using iter
} // using page
} // using img
} // using ocr
}
catch (Exception e)
{
Trace.TraceError(e.ToString());
Console.WriteLine("Unexpected Error: " + e.Message);
Console.WriteLine("Details: ");
Console.WriteLine(e.ToString());}