[tesseract-ocr] push by zde...@gmail.com - fix issue 1417 on 2015-02-07 21:22 GMT

10 views
Skip to first unread message

tesser...@googlecode.com

unread,
Feb 7, 2015, 4:22:51 PM2/7/15
to tesserac...@googlegroups.com
Revision: 4c7c960bfd57
Author: Zdenko Podobný <zde...@gmail.com>
Date: Sat Feb 7 21:22:20 2015 UTC
Log: fix issue 1417

https://code.google.com/p/tesseract-ocr/source/detail?r=4c7c960bfd57

Modified:
/api/renderer.cpp
/ccmain/tesseractclass.cpp
/ccmain/tesseractclass.h

=======================================
--- /api/renderer.cpp Sun Aug 3 16:22:12 2014 UTC
+++ /api/renderer.cpp Sat Feb 7 21:22:20 2015 UTC
@@ -113,6 +113,13 @@

AppendString(utf8);
delete[] utf8;
+
+ bool pageBreak = false;
+ api->GetBoolVariable("include_page_breaks", &pageBreak);
+ const char* pageSeparator = api->GetStringVariable("page_separator");
+ if(pageBreak) {
+ AppendString(pageSeparator);
+ }

return true;
}
=======================================
--- /ccmain/tesseractclass.cpp Tue Jan 27 21:58:04 2015 UTC
+++ /ccmain/tesseractclass.cpp Sat Feb 7 21:22:20 2015 UTC
@@ -442,6 +442,12 @@
this->params()),
BOOL_MEMBER(preserve_interword_spaces, false,
"Preserve multiple interword spaces", this->params()),
+ BOOL_MEMBER(include_page_breaks, FALSE,
+ "Include page separator string in output text after each "
+ "image/page.", this->params()),
+ STRING_MEMBER(page_separator, "\f",
+ "Page separator (default is form feed control
character)",
+ this->params()),

// The following parameters were deprecated and removed from their
original
// locations. The parameters are temporarily kept here to give
Tesseract
=======================================
--- /ccmain/tesseractclass.h Tue Jan 27 21:58:04 2015 UTC
+++ /ccmain/tesseractclass.h Sat Feb 7 21:22:20 2015 UTC
@@ -1009,7 +1009,13 @@
double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
"Fraction of height used as a minimum gap for aligned
blobs.");
INT_VAR_H(tessedit_parallelize, 0, "Run in parallel where possible");
- BOOL_VAR_H(preserve_interword_spaces, false, "Preserve multiple
interword spaces");
+ BOOL_VAR_H(preserve_interword_spaces, false,
+ "Preserve multiple interword spaces");
+ BOOL_VAR_H(include_page_breaks, false,
+ "Include page separator string in output text after each "
+ "image/page.");
+ STRING_VAR_H(page_separator, "\f",
+ "Page separator (default is form feed control character)");

// The following parameters were deprecated and removed from their
original
// locations. The parameters are temporarily kept here to give Tesseract
Reply all
Reply to author
Forward
0 new messages