Google Groups no longer supports new Usenet posts or subscriptions. Historical content remains viewable.
Dismiss

[hypermail] ISO-2022-JP support

0 views
Skip to first unread message

Hisashi Gotoh

unread,
Apr 4, 2001, 7:59:02 PM4/4/01
to
----Next_Part(Thu_Apr__5_05:20:12_2001_595)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hello, I added support to Hypermail 2.1.0 for ISO-2022-JP(RFC1468)
encoding messages. I attach a patch to this mail.

ISO-2022-JP is a standard of Japanese e-mail.

To process the Japanese e-mail, set a new option "iso2022jp" to 1.
Of course, it works good as for US-ASCII messages with "iso2022jp=1".

* To Japanize

Difficult points to process the Japanese string are to parse the URL
or e-mail string and to convert the HTML specific characters
(e.g. "). To solve these, I added new functions,
iso2022jp_state() and hm_strchr().

iso2022_state() evalutes the target string if it is US-ASCII or
ISO-2022-JP. hm_strchr() is same as strchr(), but it processes only
US-ASCII atring.

* Bug ?

"Messages sorted by: " field is empty if there is un-set "-c config"
option.


You can see sample output, below URL:

http://www.horae.dti.ne.jp/~gotoh/dist/hypermail/sample/

If there is no problem with this patch, I wish it is included in the
next release :)

Regards,

-- Hisashi Gotoh

----Next_Part(Thu_Apr__5_05:20:12_2001_595)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment; filename="hypermail-2.1.0-iso2022jp.patch"

Index: hypermail/src/getname.c
diff -u hypermail/src/getname.c:1.1.1.1 hypermail/src/getname.c:1.5
--- hypermail/src/getname.c:1.1.1.1 Tue Mar 27 00:17:11 2001
+++ hypermail/src/getname.c Fri Mar 30 03:16:15 2001
@@ -1,5 +1,6 @@
#include "hypermail.h"
#include "getname.h"
+#include "setup.h"

extern char *set_domainaddr;

@@ -105,7 +106,7 @@
/* EMail Processing First:
** First, is there an '@' sign we can use as an anchor ?
*/
- if ((c = strchr(line, '@')) == NULL) {
+ if ((c = hm_strchr(line, '@')) == NULL) {
/*
** No '@' sign here so ...
*/
@@ -196,8 +197,8 @@

comment_fnd = 1;
}
- else if (strchr(line, '(')) {
- c = strchr(line, '(') + 1;
+ else if (hm_strchr(line, '(')) {
+ c = hm_strchr(line, '(') + 1;
if (*c == '"') /* is there a comment in the comment ? */
c++;
}
@@ -250,14 +251,30 @@
}

if (!comment_fnd) {
+ int in_ascii = TRUE, esclen = 0;
for (i = 0, len = NAMESTRLEN - 1;
*c && *c != '<' && *c != '\"' && *c != ')' && *c != '(' &&
*c != '\n' && i < len; c++)
- name[i++] = *c;
-
+ {
+ if (set_iso2022jp) {
+ iso2022_state(c, &in_ascii, &esclen);
+ if (esclen) {
+ for (; esclen; esclen--, c++) name[i++] = *c;
+ for (; in_ascii == FALSE && i < len;
+ c++, iso2022_state(c, &in_ascii, &esclen)) {
+ name[i++] = *c;
+ }
+ c--;
+ } else {
+ name[i++] = *c;
+ }
+ } else {
+ name[i++] = *c;
+ }
+ }
}

- if (*c == '<' || *c == '(')
+ if (name[i-1] == ' ' && *c == '<' || *c == '(')
name[--i] = '\0';
else
name[i] = '\0';
Index: hypermail/src/mprintf.c
diff -u hypermail/src/mprintf.c:1.1.1.1 hypermail/src/mprintf.c:1.4
--- hypermail/src/mprintf.c:1.1.1.1 Tue Mar 27 00:17:12 2001
+++ hypermail/src/mprintf.c Thu Mar 29 05:43:47 2001
@@ -116,6 +116,7 @@
#endif

#include "mprintf.h"
+#include "setup.h"

#define BUFFSIZE 256 /* buffer for long-to-str and float-to-str calcs */
#define MAX_PARAMETERS 128 /* lame static limit */
@@ -1044,6 +1045,19 @@
return -1;
}

+static int
+hm_isprint(int ch)
+{
+ if (set_iso2022jp) {
+ if (isprint(ch) || isspace(ch) || ch == '\033')
+ return(1);
+ } else {
+ if (isprint(ch) || isspace(ch))
+ return(1);
+ }
+ return(0);
+}
+
/* fputc() look-alike */
static int addbyter(int output, FILE *data)
{
@@ -1051,7 +1065,7 @@

if(infop->length < infop->max) {
/* only do this if we haven't reached max length yet */
- if (isprint(output) || isspace(output))
+ if (hm_isprint(output))
{
infop->buffer[0] = (char)output; /* store */
infop->buffer++; /* increase pointer */
Index: hypermail/src/printfile.c
diff -u hypermail/src/printfile.c:1.1.1.1 hypermail/src/printfile.c:1.3
--- hypermail/src/printfile.c:1.1.1.1 Tue Mar 27 00:17:12 2001
+++ hypermail/src/printfile.c Thu Mar 29 05:43:48 2001
@@ -205,7 +205,7 @@
title = maprintf("%s: %s", label, rp = convchars(subject));
free(rp);

- if (strlen(title) > TITLESTRLEN) {
+ if (!set_iso2022jp && strlen(title) > TITLESTRLEN) {
rp = title + (TITLESTRLEN - 1);
*rp-- = '\0';
}
Index: hypermail/src/proto.h
diff -u hypermail/src/proto.h:1.1.1.2 hypermail/src/proto.h:1.5
--- hypermail/src/proto.h:1.1.1.2 Thu Apr 5 03:12:33 2001
+++ hypermail/src/proto.h Thu Apr 5 03:27:39 2001
@@ -101,6 +101,9 @@
char *parseemail(char *, char *, char *);
char *parseurl(char *);

+char *hm_strchr(const char *, int);
+void iso2022_state(const char *str, int *state, int *esc);
+
/*
** quotes.c
*/
Index: hypermail/src/setup.c
diff -u hypermail/src/setup.c:1.1.1.2 hypermail/src/setup.c:1.5
--- hypermail/src/setup.c:1.1.1.2 Thu Apr 5 03:12:33 2001
+++ hypermail/src/setup.c Thu Apr 5 04:12:52 2001
@@ -101,6 +101,8 @@
char *set_describe_folder;
int set_msgsperfolder;

+bool set_iso2022jp;
+
struct Config cfg[] = {
{"language", &set_language, LANGUAGE, CFG_STRING,
"# A two-letter string specifying the language to use!\n"
@@ -492,6 +494,9 @@
"# a symbolic link by this name to the most recently created\n"
"# subdirectory. Note that many web servers are configured to\n"
"# not follow symbolic links for security reasons.\n"},
+
+ {"iso2022jp", &set_iso2022jp, BFALSE, CFG_SWITCH,
+ "# Set this to On to support ISO-2022-JP messages.\n"},
};

/* ---------------------------------------------------------------- */
@@ -757,6 +762,7 @@
{
FILE *f;
char line[MAXLINE];
+ int r = TRUE;

PreConfig();

@@ -772,12 +778,12 @@
fclose(f);
}
else
- return FALSE;
+ r = FALSE;
}

PostConfig();

- return TRUE;
+ return r;
}

void ConfigCleanup(void)
Index: hypermail/src/setup.h
diff -u hypermail/src/setup.h:1.1.1.2 hypermail/src/setup.h:1.4
--- hypermail/src/setup.h:1.1.1.2 Thu Apr 5 03:12:34 2001
+++ hypermail/src/setup.h Thu Apr 5 03:33:23 2001
@@ -125,4 +125,6 @@
extern int set_msgsperfolder;
extern char *set_describe_folder;

+extern bool set_iso2022jp;
+
#endif
Index: hypermail/src/string.c
diff -u hypermail/src/string.c:1.1.1.2 hypermail/src/string.c:1.6
--- hypermail/src/string.c:1.1.1.2 Thu Apr 5 03:12:34 2001
+++ hypermail/src/string.c Thu Apr 5 03:27:39 2001
@@ -387,12 +387,26 @@
char *convchars(char *line)
{
struct Push buff;
+ int in_ascii = TRUE, esclen = 0;

INIT_PUSH(buff); /* init macro */

/* avoid strlen() for speed */

for (; *line; line++) {
+
+ if (set_iso2022jp) {
+ iso2022_state(line, &in_ascii, &esclen);
+ if (esclen && in_ascii == FALSE) {
+ for (; in_ascii == FALSE && *line; line++) {
+ PushByte(&buff, *line);
+ iso2022_state(line, &in_ascii, &esclen);
+ }
+ line--;
+ continue;
+ }
+ }
+
switch (*line) {
case '<':
PushString(&buff, "&lt;");
@@ -458,9 +472,22 @@
static void translatechars(char *start, char *end, struct Push *buff)
{
char *p;
+ int in_ascii = TRUE, esclen = 0;

for (p = start; p <= end; p++) {

+ if (set_iso2022jp) {
+ iso2022_state(p, &in_ascii, &esclen);
+ if (esclen && in_ascii == FALSE) {
+ for (; in_ascii == FALSE && p <= end; p++) {
+ PushByte(buff, *p);
+ iso2022_state(p, &in_ascii, &esclen);
+ }
+ p--;
+ continue;
+ }
+ }
+
switch (*p) {
case '<':
PushString(buff, "&lt;");
@@ -540,11 +567,13 @@
char *replacechar(char *string, char old, char *new)
{
struct Push buff;
+ int in_ascii = TRUE, esclen = 0;

INIT_PUSH(buff);

for (; *string; string++) {
- if (*string == old) {
+ if (set_iso2022jp) iso2022_state(string, &in_ascii, &esclen);
+ if (in_ascii == TRUE && *string == old) {
PushString(&buff, new);
}
else
@@ -640,6 +669,8 @@

char *at;

+ int in_ascii = TRUE, esclen = 0;
+
if(set_spamprotect)
at="_at_";
else
@@ -657,6 +688,16 @@
#define VALID_IN_EMAIL_USERNAME "a-zA-Z0-9_.%-"
#define VALID_IN_EMAIL_DOMAINNAME "a-zA-Z0-9.-"

+ if (set_iso2022jp) {
+ for (; ptr > input; input++) {
+ iso2022_state(input, &in_ascii, &esclen);
+ if (!esclen) continue;
+ input += esclen;
+ if (in_ascii == TRUE)
+ backoff = ptr - input;
+ }
+ }
+
/* check left side */
while (backoff) {
if (sscanf
@@ -826,6 +867,7 @@

if (leftmost) { /* we found at least one protocol prefix */
int accepted = FALSE;
+ int urlscan = FALSE;

/*
* all the charaters between the position where we started
@@ -836,7 +878,11 @@
translatechars(inputp, leftmost-1, &buff);
inputp = leftmost + strlen(thisprotocol);

- if (sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff)) {
+ if (set_iso2022jp)
+ urlscan = sscanf(inputp, "%255[^] \033)>\"\'\n[\t\\]", urlbuff);
+ else
+ urlscan = sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff);
+ if (urlscan) {
char *r;

/*
@@ -883,3 +929,93 @@
}
RETURN_PUSH(buff);
} /* end parseurl() */
+
+/*
+ * Support RFC1468 (and RFC1554, 94 character sets)
+ *
+ * reference
+ * - RFC1468: Japanese Character Encoding for Internet Messages (ISO-2022-JP)
+ * - RFC1554: ISO-2022-JP-2: Multilingual Extension of ISO-2022-JP
+ * - RFC1557: Korean Character Encoding for Internet Messages
+ * - RFC2234: Japanese Character Encoding for Internet Messages
+ */
+
+/*
+ * state
+ * TRUE: ascii (default)
+ * FALSE: non-ascii
+ * esclen
+ * n: escape sequence length
+ */
+void
+iso2022_state(const char *str, int *state, int *esclen)
+{
+ if (*state != TRUE && *state != FALSE)
+ *state = TRUE;
+
+ if (*str != '\033') {
+ *esclen = 0;
+ return;
+ }
+
+ switch (*(str+1)) {
+ case '$':
+ if (*(str+2) == 'B' || *(str+2) == '@' || *(str+2) == 'A') {
+ /*
+ * ESC $ B JIS X 0208-1983 to G0
+ * ESC $ @ JIS X 0208-1976 to G0
+ * ESC $ A GB2312-1980 to G0
+ */
+ *state = FALSE;
+ *esclen = 3;
+ } else if ((*(str+2) == '(' && *(str+3) == 'C') ||
+ (*(str+2) == '(' && *(str+3) == 'D')) {
+ /*
+ * ESC $ ) C KSC 5601-1987 to G0
+ * ESC $ ( D JIS X 0212-1990 to G0
+ */
+ *state = FALSE;
+ *esclen = 4;
+ } else {
+ /* keep state */
+ *esclen = 1;
+ }
+ break;
+ case '(':
+ if (*(str+2) == 'B' || *(str+2) == 'J') {
+ /*
+ * ESC ( B ASCII to G0
+ * ESC ( J JIS X 0201-Roman to G0
+ */
+ *state = TRUE;
+ *esclen = 3;
+ } else {
+ /* keep state */
+ *esclen = 1;
+ }
+ break;
+ default:
+ /* keep state */
+ *esclen = 1;
+ }
+}
+
+char *
+hm_strchr(const char *str, int ch)
+{
+ if (!set_iso2022jp) {
+ return(strchr(str, ch));
+ } else {
+ int in_ascii = TRUE, esclen = 0;
+
+ for (; *str; str++) {
+ iso2022_state(str, &in_ascii, &esclen);
+ if (esclen) str += esclen;
+ if (in_ascii == TRUE) {
+ if (*str == ch)
+ return((char *)str);
+ }
+ }
+ return((char *)NULL);
+ }
+}

----Next_Part(Thu_Apr__5_05:20:12_2001_595)----

Peter C. McCluskey

unread,
Apr 6, 2001, 2:33:49 PM4/6/01
to

go...@horae.dti.ne.jp (Hisashi Gotoh) writes:
>Hello, I added support to Hypermail 2.1.0 for ISO-2022-JP(RFC1468)
>encoding messages. I attach a patch to this mail.

Thank you. I have committed it to our cvs repository.
--
------------------------------------------------------------------------------
Peter McCluskey | Fed up with democracy's problems? Examine Futarchy:
http://www.rahul.net/pcm | http://hanson.gmu.edu/futarchy.pdf or .ps

0 new messages