Non ascii character support for Palm .pdb ebook file.

4 views
Skip to first unread message

Ludo

unread,
Jan 3, 2010, 5:55:27 PM1/3/10
to FBReader
Hi,

I've tried to read a .pdb file (http://www.ebooksgratuits.org/
newsendbook.php?id=449&format=pr)
but FBReader does not seems to support non ascii sequence \aXXX and
\UXXXX as explained in this document:
http://www.m.ereader.com/ereader/help/dropbook/pml.htm

So I've tried to make a patch that seems to work (tested on Linux
Ubuntu 9.04)

Please find the patch just after.

Best regards,

Ludo.

--- fbreader-0.12.1/fbreader/src/formats/pdb/PmlReader.cpp 2009-12-13
09:03:11.000000000 +0100
+++ fbreader-0.12.1-ludo/fbreader/src/formats/pdb/PmlReader.cpp
2010-01-03 22:57:36.000000000 +0100
@@ -23,14 +23,55 @@
* http://ccit205.wikispaces.com/Palm+Markup+Language+(PML)
*/

+#include <stdlib.h>
+
#include <cstdlib>
#include <cctype>

#include <ZLFile.h>
#include <ZLInputStream.h>
+#include <ZLUnicodeUtil.h>

#include "PmlReader.h"

+
+// Conversion for Windows CP 1252 encoding to Unicode.
+// Only characters between 128 and 159 have a different value.
+static const int cp1252_128_159_to_ucs4[32] = {
+ 8364, // 128
+ 129, // INVALID VALUE
+ 8218, // 130
+ 402, // 131
+ 8222, // 132
+ 8230, // 133
+ 8224, // 134
+ 8225, // 135
+ 710, // 136
+ 8240, // 137
+ 352, // 138
+ 8249, // 139
+ 338, // 140
+ 141, // INVALID VALUE
+ 381, // 142
+ 143, // INVALID VALUE
+ 144, // INVALID VALUE
+ 8216, // 145
+ 8217, // 146
+ 8220, // 147
+ 8221, // 148
+ 8226, // 149
+ 8211, // 150
+ 8212, // 151
+ 732, // 152
+ 8482, // 153
+ 353, // 154
+ 8250, // 155
+ 339, // 156
+ 157, // INVALID VALUE
+ 382, // 158
+ 376, // 159
+};
+
static const int pmlStreamBufferSize = 4096;

const std::string PmlReader::ourDefaultParameter = "";
@@ -221,6 +262,10 @@
case 'C': // CN - chapter title + indent level| - | - |
case 'F': // Fn - link to a footnote | + | + |
return 2;
+ case 'a': // \aXXX Insert non-ASCII character whose Windows
1252 code is decimal XXX.
+ return 4;
+ case 'U': // \UXXXX Insert non-ASCII character whose Unicode
code is hexidecimal XXXX.
+ return 5;
default:
return 0;
}
@@ -310,6 +355,25 @@
case 'm':
addImageReference(parameter);
break;
+ case 'a': // \aXXX Insert non-ASCII character whose Windows
1252 code is decimal XXX.
+ {
+ int char_code = strtol(tagName.c_str()+1, 0, 10); // Windows
CP 1252 code
+ if ((char_code >=128) && (char_code <=159)) {
+ char_code = cp1252_128_159_to_ucs4[char_code - 128];
+ }
+ char buffer[8];
+ int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, char_code);
+ processCharData(buffer, len, false);
+ }
+ break;
+ case 'U': // \UXXXX Insert non-ASCII character whose Unicode
code is hexidecimal XXXX.
+ {
+ int char_code = strtol(tagName.c_str()+1, 0, 16); // Unicode
+ char buffer[8];
+ int len = ZLUnicodeUtil::ucs4ToUtf8(buffer, char_code);
+ processCharData(buffer, len, false);
+ }
+ break;
default:
//std::cerr << "PmlReader: unsupported tag: name: " << tagName
<< " parameter: " << parameter << "\n";
break;

Reply all
Reply to author
Forward
0 new messages