It also contains a fix to issue 12 reported in inferno-os:
http://code.google.com/p/inferno-os/issues/detail?id=12
In case the diff is garbled, please pick up the patch from
http://www.tip9ug.jp/who/art/inferno/tcs-tune.patch.
Thanks.
Arvindh
diff -r ad35fd4ac318 -r 5898f9369806 appl/cmd/tcs.b
--- a/appl/cmd/tcs.b Sat Jun 16 02:53:24 2007 +0530
+++ b/appl/cmd/tcs.b Sat Jun 16 03:30:13 2007 +0530
@@ -95,15 +95,20 @@ init(nil : ref Draw->Context, args : lis
btoss : Convcs->State = nil;
stobs : Convcs->State = nil;
- while ((n := sys->read(fd, inbuf[start:], len inbuf - start)) > 0) {
+ while ((n := sys->read(fd, inbuf[start:], len inbuf - start)) >= 0) {
s := "";
nc := 0;
outbuf : array of byte = nil;
- (btoss, s, nc) = btos->btos(btoss, inbuf[0:n], -1);
- if (s != nil)
+ n += start;
+ if (n)
+ (btoss, s, nc) = btos->btos(btoss, inbuf[0:n], -1);
+ else
+ (btoss, s, nc) = btos->btos(btoss, inbuf[0:n], 0);
+ if (s != nil || n == 0) {
(stobs, outbuf) = stob->stob(stobs, s);
- if (outbuf != nil) {
out.write(outbuf, len outbuf);
+ if (s == nil)
+ break;
}
# copy down unconverted part of buffer
start = n - nc;
diff -r ad35fd4ac318 -r 5898f9369806 appl/lib/convcs/mkfile
--- a/appl/lib/convcs/mkfile Sat Jun 16 02:53:24 2007 +0530
+++ b/appl/lib/convcs/mkfile Sat Jun 16 03:30:13 2007 +0530
@@ -10,6 +10,8 @@ TARG=\
cp_stob.dis\
euc-jp_btos.dis\
gb2312_btos.dis\
+ tune_btos.dis\
+ tune_stob.dis\
utf8_btos.dis\
utf8_stob.dis\
diff -r ad35fd4ac318 -r 5898f9369806 appl/lib/convcs/tune.b
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/appl/lib/convcs/tune.b Sat Jun 16 03:30:13 2007 +0530
@@ -0,0 +1,83 @@
+BADCHAR: con 16rfffd;
+
+t1 := array[] of {
+ ('அ', ''),
+ ('ஆ', ''),
+ ('இ', ''),
+ ('ஈ', ''),
+ ('உ', ''),
+ ('ஊ', ''),
+ ('எ', ''),
+ ('ஏ', ''),
+ ('ஐ', ''),
+ ('ஒ', ''),
+ ('ஓ', ''),
+ ('ஔ', ''),
+ ('ஃ', '')
+};
+
+t2 := array[] of {
+ '்',
+ '்', # filler
+ 'ா',
+ 'ி',
+ 'ீ',
+ 'ு',
+ 'ூ',
+ 'ெ',
+ 'ே',
+ 'ை',
+ 'ொ',
+ 'ோ',
+ 'ௌ'
+};
+
+t3 := array[] of {
+ ('க', ''),
+ ('ங', ''),
+ ('ச', ''),
+ ('ஜ', ''),
+ ('ஞ', ''),
+ ('ட', ''),
+ ('ண', ''),
+ ('த', ''),
+ ('ந', ''),
+ ('ன', ''),
+ ('ப', ''),
+ ('ம', ''),
+ ('ய', ''),
+ ('ர', ''),
+ ('ற', ''),
+ ('ல', ''),
+ ('ள', ''),
+ ('ழ', ''),
+ ('வ', ''),
+ ('ஶ', ''),
+ ('ஷ', ''),
+ ('ஸ', ''),
+ ('ஹ', '')
+};
+
+findbytune(tab: array of (int, int), t: int): int
+{
+ for(i:=0; i<len tab; i++)
+ if(tab[i].t1 == t)
+ return tab[i].t0;
+ return BADCHAR;
+}
+
+findbyuni(tab: array of (int, int), u: int): int
+{
+ for(i:=0; i<len tab; i++)
+ if(tab[i].t0 == u)
+ return tab[i].t1;
+ return BADCHAR;
+}
+
+findindex(tab: array of int, c: int): int
+{
+ for(i:=0; i<len tab; i++)
+ if(tab[i] == c)
+ return i;
+ return -1;
+}
diff -r ad35fd4ac318 -r 5898f9369806 appl/lib/convcs/tune_btos.b
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/appl/lib/convcs/tune_btos.b Sat Jun 16 03:30:13 2007 +0530
@@ -0,0 +1,66 @@
+implement Btos;
+
+include "sys.m";
+include "convcs.m";
+include "tune.b";
+
+MAXINT: con 16r7fffffff;
+
+sys : Sys;
+
+init(nil: string): string
+{
+ sys = load Sys Sys->PATH;
+ return nil;
+}
+
+btos(nil: Convcs->State, b: array of byte, n: int): (Convcs->State, string, int)
+{
+ nc, nb, tr, i: int;
+ str: string;
+
+ nc = nb = 0;
+ str = "";
+ if(n == -1)
+ n = MAXINT;
+ while(nb<len b && nc<n-3){
+ (c, l, nil) := sys->byte2char(b, nb);
+ if(l == 0)
+ break;
+ nb += l;
+ if(c>='' && c <= '' && (i = c%16) < len t2){
+ if(c >= ''){
+ str[nc++] = 'க';
+ str[nc++] = '்';
+ str[nc++] = 'ஷ';
+ }else
+ str[nc++] = findbytune(t3, c-i+1);
+ if(i != 1)
+ str[nc++] = t2[i];
+ }else if((tr = findbytune(t1, c)) != BADCHAR)
+ str[nc++] = tr;
+ else case c{
+ '' =>
+ str[nc++] = 'ண'; str[nc++] = 'ா';
+ '' =>
+ str[nc++] = 'ற'; str[nc++] = 'ா';
+ '' =>
+ str[nc++] = 'ன'; str[nc++] = 'ா';
+ '' =>
+ str[nc++] = 'ண'; str[nc++] = 'ை';
+ '' =>
+ str[nc++] = 'ல'; str[nc++] = 'ை';
+ '' =>
+ str[nc++] = 'ள'; str[nc++] = 'ை';
+ '' =>
+ str[nc++] = 'ன'; str[nc++] = 'ை';
+ '' =>
+ str[nc++] = 'ஶ'; str[nc++] = '்'; str[nc++] = 'ர'; str[nc++] = 'ீ';
+ * =>
+ if(c >= '' && c <= '')
+ c = BADCHAR;
+ str[nc++] = c;
+ }
+ }
+ return (nil, str, nb);
+}
diff -r ad35fd4ac318 -r 5898f9369806 appl/lib/convcs/tune_stob.b
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/appl/lib/convcs/tune_stob.b Sat Jun 16 03:30:13 2007 +0530
@@ -0,0 +1,128 @@
+implement Stob;
+
+include "sys.m";
+include "convcs.m";
+include "tune.b";
+
+sys: Sys;
+lastc: int;
+Startstate: import Convcs;
+
+init(nil: string): string
+{
+ sys = load Sys Sys->PATH;
+ return nil;
+}
+
+stob(s: Convcs->State, str: string): (Convcs->State, array of byte)
+{
+ c, i, n, nb: int;
+ b: array of byte;
+
+ b = array[(len str+2)*Sys->UTFmax] of byte;
+ n = nb = 0;
+ while(n<len str){
+ case s{
+ Startstate =>
+ if((c=findbyuni(t3, str[n])) != BADCHAR){
+ lastc = c;
+ s = "1";
+ }else if(str[n] == 'ஒ'){
+ lastc = '';
+ s = "3";
+ }else if((c=findbyuni(t1, str[n])) != BADCHAR)
+ nb += sys->char2byte(c, b, nb);
+ else
+ nb += sys->char2byte(str[n], b, nb);
+ "1" =>
+ if((i=findindex(t2, str[n])) != -1){
+ if(lastc!=BADCHAR)
+ lastc += i-1;
+ if(str[n] == 'ெ')
+ s = "5";
+ else if(str[n] == 'ே')
+ s = "4";
+ else if(lastc == '')
+ s = "2";
+ else if(lastc == '')
+ s = "6";
+ else{
+ nb += sys->char2byte(lastc, b, nb);
+ s = Startstate;
+ }
+ }else if(lastc!=BADCHAR && (str[n]=='²' || str[n]=='³' || str[n]=='⁴')){
+ lastc = BADCHAR;
+ }else{
+ nb += sys->char2byte(lastc, b, nb);
+ s = Startstate;
+ continue;
+ }
+ "2" =>
+ if(str[n] == 'ஷ'){
+ lastc = '';
+ s = "1";
+ }else{
+ nb += sys->char2byte(lastc, b, nb);
+ s = Startstate;
+ continue;
+ }
+ "3" =>
+ s = Startstate;
+ if(str[n] == 'ௗ')
+ nb += sys->char2byte('', b, nb);
+ else{
+ nb += sys->char2byte(lastc, b, nb);
+ continue;
+ }
+ "4" =>
+ s = Startstate;
+ if(str[n] == 'ா'){
+ if(lastc != BADCHAR)
+ lastc += 3;
+ nb += sys->char2byte(lastc, b, nb);
+ }else{
+ nb += sys->char2byte(lastc, b, nb);
+ continue;
+ }
+ "5" =>
+ s = Startstate;
+ if(str[n] == 'ா' || str[n] == 'ௗ'){
+ if(lastc != BADCHAR)
+ if(str[n] == 'ா')
+ lastc += 3;
+ else
+ lastc += 5;
+ nb += sys->char2byte(lastc, b, nb);
+ }else{
+ nb += sys->char2byte(lastc, b, nb);
+ continue;
+ }
+ "6" =>
+ if(str[n] == 'ர')
+ s = "7";
+ else{
+ nb += sys->char2byte(lastc, b, nb);
+ s = Startstate;
+ continue;
+ }
+ "7" =>
+ if(str[n] == 'ீ'){
+ nb += sys->char2byte('', b, nb);
+ s = Startstate;
+ }else{
+ nb += sys->char2byte(lastc, b, nb);
+ lastc = '';
+ s = "1";
+ continue;
+ }
+ }
+ n++;
+ }
+ if(str == "" && s != Startstate){
+ nb += sys->char2byte(lastc, b, nb);
+ if(s == "7")
+ nb += sys->char2byte('', b, nb);
+ s = Startstate;
+ }
+ return (s, b[:nb]);
+}
diff -r ad35fd4ac318 -r 5898f9369806 lib/convcs/charsets
--- a/lib/convcs/charsets Sat Jun 16 02:53:24 2007 +0530
+++ b/lib/convcs/charsets Sat Jun 16 03:30:13 2007 +0530
@@ -199,3 +199,8 @@ 8bit=
8bit=
desc="raw 8-bit data"
stob=/dis/lib/convcs/8bit_stob.dis
+
+tune=
+ desc='Tamil Unicode New Encoding (TUNE)'
+ stob=/dis/lib/convcs/tune_stob.dis
+ btos=/dis/lib/convcs/tune_btos.dis
-caerwyn