Hi all,
this is my code. I use the delegate to call the DLL and I created
classes OCRPage, OCRRow and OCRWord to represent the page processed
from Tesseract.
If you are interested I have written two small posts about my code:
http://factory.wavegroup.it/blog/2008/4/4/ancora-su-tesseract
http://factory.wavegroup.it/blog/2008/3/17/nell-era-dell-indicizzazione-risorge-tesseract-seconda-parte
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Drawing;
using System.Drawing.Imaging;
using System.Drawing.Drawing2D;
using System.IO;
namespace Wave.OCR.Test
{
public class WaveRec
{
const string dllfile = @"tessdll.dll";
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate int TessDllBeginPageUprightBPPDelegate(uint
xsize, uint ysize, System.IntPtr buf,
[System.Runtime.InteropServices.InAttribute()]
[System.Runtime.InteropServices.MarshalAsAttribute(System.Runtime.InteropServices.UnmanagedType.LPStr)]
string lang, byte bpp);
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate System.IntPtr
TessDllRecognize_all_WordsDelegate();
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void TessDllReleaseDelegate();
public delegate string TransformWordDelegate(string text);
static private int MAX_CHAR_RECOGNIZE = 32000;
static private int DISTANCE_WORDS = 5;
public static OCRPage CreatePageWD(Image file)
{
IntPtr pDll = WrapperUnManaged.LoadLibrary(dllfile);
IntPtr pAddressOfFunctionToCall =
WrapperUnManaged.GetProcAddress(pDll, "TessDllBeginPageUprightBPP");
TessDllBeginPageUprightBPPDelegate BeginPage =
(TessDllBeginPageUprightBPPDelegate)Marshal.GetDelegateForFunctionPointer(
pAddressOfFunctionToCall,
typeof(TessDllBeginPageUprightBPPDelegate));
pAddressOfFunctionToCall =
WrapperUnManaged.GetProcAddress(pDll, "TessDllRecognize_all_Words");
TessDllRecognize_all_WordsDelegate Recognize_all_Words =
(TessDllRecognize_all_WordsDelegate)Marshal.GetDelegateForFunctionPointer(
pAddressOfFunctionToCall,
typeof(TessDllRecognize_all_WordsDelegate));
pAddressOfFunctionToCall =
WrapperUnManaged.GetProcAddress(pDll, "TessDllRelease");
TessDllReleaseDelegate Release =
(TessDllReleaseDelegate)Marshal.GetDelegateForFunctionPointer(
pAddressOfFunctionToCall,
typeof(TessDllReleaseDelegate));
Image i = null;
Image filetmp = null;
MemoryStream ms = null;
ETEXT_DESC[] structUN = new
ETEXT_DESC[MAX_CHAR_RECOGNIZE];
BitmapData data = null;
try
{
OCRPage page = new OCRPage();
int numbersPages =
file.GetFrameCount(FrameDimension.Page);
for (int pageF = 0; pageF < numbersPages; pageF++)
{
file.SelectActiveFrame(FrameDimension.Page,
pageF);
ms = new MemoryStream();
filetmp = file;
data = ((Bitmap)filetmp).LockBits(new Rectangle(0,
0, filetmp.Width, filetmp.Height), ImageLockMode.ReadOnly,
filetmp.PixelFormat);// PixelFormat.Format1bppIndexed);
((Bitmap)filetmp).UnlockBits(data);
char[] words = new char[MAX_CHAR_RECOGNIZE];
byte bpp = 1;
if (filetmp.PixelFormat ==
PixelFormat.Format8bppIndexed) bpp = 8;
else if (filetmp.PixelFormat ==
PixelFormat.Format24bppRgb) bpp = 24;
int f = BeginPage((uint)filetmp.Width,
(uint)filetmp.Height, data.Scan0, "eng", bpp);
IntPtr oi = Recognize_all_Words();
IntPtr currentP = IntPtr.Zero;
currentP = oi;
for (int cont = 0; cont < MAX_CHAR_RECOGNIZE; cont+
+)
{
structUN[cont] =
(ETEXT_DESC)Marshal.PtrToStructure(currentP, typeof(ETEXT_DESC));
Marshal.DestroyStructure(currentP,
typeof(ETEXT_DESC));
currentP = new IntPtr(currentP.ToInt32() +
16);
}
FillPageFromStruct(page, structUN);
Release();
bool result = WrapperUnManaged.FreeLibrary(pDll);
}
return page;
}
catch (Exception)
{
throw;
}
finally
{
filetmp.Dispose();
ms.Dispose();
}
}
/// <summary>
/// Visita la struttura della pagina ed esegue la funzione
callback per ogni parola
/// </summary>
/// <param name="page"></param>
public static void VisitPage(OCRPage page,
TransformWordDelegate transformWord)
{
int countw = 1;
foreach (OCRRow riga in page.rows.Values)
{
foreach (OCRWord word in riga.words)
{
//delegate callback
//string s =
word.Text = transformWord(word.Text);
}
countw++;
}
}
/// <summary>
/// Estrae le parole contenute in una certa riga della pagina
e in un determinato intervallo di coordinata
/// </summary>
/// <param name="page">oggetto di tipo OCRPage che rappresenta
la pagina</param>
/// <param name="row">riga nella quale cercare</param>
/// <param name="inflim">limite inferiore dell'intervallo</
param>
/// <param name="suplim">limite superiore</param>
/// <returns>Array di parole trovate nell'intervallo</returns>
/// <author>Francesco Sinopoli</author>
public static List<OCRWord> ExtractWordsFromRange(OCRPage
page, int indexrow, decimal inflim, decimal suplim)
{
indexrow--; //l'indice ha base zero
List<OCRWord> wordsList = new List<OCRWord>();
if (!page.rows.ContainsKey(indexrow)) return wordsList;
OCRRow row = page.rows[indexrow];
foreach (OCRWord word in row.words)
{
if ((word.Left < inflim && word.Right < inflim) ||
(word.Left > suplim && word.Right > suplim)) continue;
wordsList.Add(word);
}
return wordsList;
}
/// <summary>
/// Riempie la struttura della pagina dopo il marshaling delle
strutture ritornate dal codice non gestito
/// </summary>
/// <param name="pagina">Oggetto di tipo OCRPage da popolare</
param>
/// <param name="words">Array di caratteri dal quale
estrapolare le parole</param>
/// <author>Francesco Sinopoli</author>
internal static void FillPageFromStruct(OCRPage page,
ETEXT_DESC[] words)
{
string word = string.Empty;
int numRow = 0;
int startRectWord = 0;
int endRectWord = 0;
int upRectWord = 0;
int downRectWord = 0;
decimal leftPre = 0;
decimal heightPre = 0;
OCRWord tmpword = null;
if (words.Length > 0)
{
startRectWord = words[0].text[0].left;
upRectWord = words[0].text[0].top;
}
else return;
page.rows.Add(numRow, new OCRRow()); //aggiungo una nuova
riga alla pagina
foreach (ETEXT_DESC myChar in words) // scorro tutti i
caratteri trovati
{
EANYCODE_CHAR[] infoChar1 = new EANYCODE_CHAR[1];
infoChar1 = myChar.text;
EANYCODE_CHAR infoChar = infoChar1[0];
decimal code =
Convert.ToDecimal(infoChar.char_code.ToString());
decimal left = Convert.ToDecimal(infoChar.left);
decimal height = Convert.ToDecimal(infoChar.bottom);
if (code == 0) //caratteri finiti
{
//trucco: recupero i precedenti
if (tmpword != null)
{
infoChar.bottom = (short)tmpword.Bottom;
infoChar.top = (short)tmpword.Top;
infoChar.left = (short)leftPre;
infoChar.right = (short)tmpword.Right;
}
tmpword = new OCRWord(word);
tmpword.Bottom = infoChar.bottom;
tmpword.Top = infoChar.top;
tmpword.Left = infoChar.left;
tmpword.Right = infoChar.right;
page.rows[numRow].words.Add(tmpword);
break;
}
object charvalue = (char)code;
decimal numblanks =
Convert.ToDecimal(infoChar.blanks.ToString());
if (numblanks >= 1 || left < leftPre)
{
tmpword = new OCRWord(word);
tmpword.Bottom = downRectWord;
tmpword.Top = upRectWord;
tmpword.Left = startRectWord;
tmpword.Right = endRectWord;
page.rows[numRow].words.Add(tmpword);
if (left < leftPre)
{
numRow++;
page.rows.Add(numRow, new OCRRow());
if (heightPre != 0)
page.distancesRows.Add(infoChar.bottom - heightPre);
heightPre = infoChar.bottom;
}
word = charvalue.ToString(); //inizializza parola
startRectWord = infoChar.left;
upRectWord = infoChar.top;
downRectWord = infoChar.bottom;
endRectWord = infoChar.right;
}
else
{
word += charvalue.ToString();
endRectWord = infoChar.right; //la fine della
parola coincide con il margine sinistro del carattere aggiunto
if (infoChar.top <= upRectWord) upRectWord =
infoChar.top;
if (infoChar.bottom >= downRectWord) downRectWord
= infoChar.bottom;
}
leftPre = left;
}
decimal med = 0;
foreach (decimal num in page.distancesRows) med += num;
if (page.distancesRows.Count != 0) med = med /
page.distancesRows.Count;
page.DistanceRows = med;
}
public static int GetRowSentence(string[] sentence, OCRPage
page, StringComparison comparisonType, out decimal inflim, out decimal
suplim)
{
int countR = 1;
int numberWordOS = 0;
inflim = 0;
suplim = 0;
string wordSearch = sentence[numberWordOS];
foreach (OCRRow row in page.rows.Values)
{
foreach (OCRWord word in row.words)
{
if (numberWordOS == sentence.Length) return
countR;
wordSearch = sentence[numberWordOS];
if (word.Text.Equals(wordSearch, comparisonType))
{
numberWordOS++;
if (numberWordOS == 1) inflim = word.Left;
}
else
{
numberWordOS = 0;
}
suplim = word.Right;
}
countR++;
}
return -1;
}
public static OCRWord GetWord(string wordSearch, OCRPage page,
StringComparison comparisonType)
{
int countR = 1;
foreach (OCRRow row in page.rows.Values)
{
//log.AppendText("Numero di parole della {0} riga:
{1}", countw, riga.words.Count);
foreach (OCRWord word in row.words)
{
if (word.Text.Equals(wordSearch, comparisonType))
return word;
}
countR++;
}
return null;
}
public static int SearchSentence(string[] sentence, OCRPage
page, StringComparison comparisonType)
{
int countR = 1;
int numberWordOS = 0;
string wordSearch = sentence[numberWordOS];
foreach (OCRRow row in page.rows.Values)
{
foreach (OCRWord word in row.words)
{
if (numberWordOS == sentence.Length) return
countR;
wordSearch = sentence[numberWordOS];
if (word.Text.Equals(wordSearch, comparisonType))
{
numberWordOS++;
}
else
{
numberWordOS = 0;
}
}
countR++;
}
return -1;
}
public static Rectangle SearchSentenceR(string[] sentence,
OCRPage page, StringComparison comparisonType)
{
int countR = 1;
int numberWordOS = 0;
Rectangle recSentence = new Rectangle();
string wordSearch = sentence[numberWordOS];
foreach (OCRRow row in page.rows.Values)
{
foreach (OCRWord word in row.words)
{
if (numberWordOS == sentence.Length) return
recSentence;
wordSearch = sentence[numberWordOS];
if (word.Text.Equals(wordSearch, comparisonType))
{
numberWordOS++;
if (numberWordOS == 1)
{
recSentence.X = word.Left;
}
if (word.Top < recSentence.Y || recSentence.Y
== 0) recSentence.Y = word.Top;
if (word.Bottom > recSentence.Bottom)
recSentence.Height = word.Bottom - word.Top;
recSentence.Width = word.Right -
recSentence.X;
}
else
{
numberWordOS = 0;
}
}
countR++;
}
return recSentence;
}
public static int SearchWord(string wordSearch, OCRPage page,
StringComparison comparisonType)
{
int countR = 1;
foreach (OCRRow row in page.rows.Values)
{
foreach (OCRWord word in row.words)
{
if (word.Text.Equals(wordSearch, comparisonType))
return countR;
}
countR++;
}
return -1;
}
#region Metodi Helper
#endregion
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
unsafe public struct ETEXT_DESC
{
/// INT16->short
public short count;
/// INT16->short
public short progress;
/// INT8->char
public byte more_to_come;
/// INT8->char
public byte ocr_alive;
/// INT8->char
public byte err_code;
/// CANCEL_FUNC
//[MarshalAs(UnmanagedType.FunctionPtr)]
//public CANCEL_FUNC cancel;
public IntPtr cancel;
/// void*
public System.IntPtr cancel_this;
/// clock_t->int
public int end_time;
/// EANYCODE_CHAR[1]
[System.Runtime.InteropServices.MarshalAsAttribute(System.Runtime.InteropServices.UnmanagedType.ByValArray,
SizeConst = 1, ArraySubType =
System.Runtime.InteropServices.UnmanagedType.Struct)]
public EANYCODE_CHAR[] text;
}
// [StructLayout(LayoutKind.Sequential, Size = 17)]
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
unsafe public struct EANYCODE_CHAR
{
/// UINT16->unsigned short
public ushort char_code;
/// INT16->short
public short left;
/// INT16->short
public short right;
/// INT16->short
public short top;
/// INT16->short
public short bottom;
/// INT16->short
public short font_index;
/// UINT8->unsigned char
public byte confidence;
/// UINT8->unsigned char
public byte point_size;
/// INT8->char
public byte blanks;
/// UINT8->unsigned char
public byte formatting;
}
}
public static class WrapperUnManaged
{
[DllImport("kernel32.dll")]
public static extern IntPtr LoadLibrary(string dllToLoad);
[DllImport("kernel32.dll")]
public static extern IntPtr GetProcAddress(IntPtr hModule,
string procedureName);
[DllImport("kernel32.dll")]
public static extern bool FreeLibrary(IntPtr hModule);
}
#region Le classi per rappresentazione della pagina
/// <summary>
/// Classe che rappresenta una pagina
/// </summary>
public class OCRPage
{
public Dictionary<int, OCRRow> rows = new Dictionary<int,
OCRRow>();
internal List<Decimal> distancesRows = new List<decimal>();
private decimal p_distanceRows;
public decimal DistanceRows
{
get
{
return p_distanceRows;
}
set
{
p_distanceRows = value;
}
}
}
/// <summary>
/// Classe che rappresenta una riga di parole nella pagina
/// </summary>
public class OCRRow
{
public List<OCRWord> words = new List<OCRWord>();
}
/// <summary>
/// Classe che rappresenta una parola nella pagina
/// </summary>
public class OCRWord
{
private string p_text;
public string Text
{
get
{
return p_text;
}
set
{
p_text = value;
}
}
private int p_Left;
public int Left
{
get
{
return p_Left;
}
set
{
p_Left = value;
}
}
private int p_Right;
public int Right
{
get
{
return p_Right;
}
set
{
p_Right = value;
}
}
private int p_Top;
public int Top
{
get
{
return p_Top;
}
set
{
p_Top = value;
}
}
private int p_Bottom;
public int Bottom
{
get
{
return p_Bottom;
}
set
{
p_Bottom = value;
}
}
public OCRWord(string text)
{
p_text = text;
}
}
#endregion
}