Fix for #46168 'some additional locale aliases required for chinese

2003-07-11 Suresh Chandrasekharan <suresh.chandrasekharan@sun.com> * camel-iconv.c: Fix for #46168 'some additional locale aliases required for chinese support'. 2003-07-25 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-utils.c (header_decode_word): Revert NotZed's fix for bug #42170 - this causes even more problems than it solves. See bug #46331 for info. Basically, each address header would be converted to UTF-8 twice which means no raw 8bit address header would render correctly. (header_decode_mailbox): Perform a sanity check on the resultant addr->str to make sure that it is valid UTF-8, if not convert it to UTF-8. Fixes bug #42170. svn path=/trunk/; revision=22013
2003-07-29 13:52:43 +00:00
parent 5503603cce
commit c0a0accfc3
4 changed files with 70 additions and 43 deletions
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@ -1,3 +1,19 @@
+2003-07-11  Suresh Chandrasekharan <suresh.chandrasekharan@sun.com>
+
+	* camel-iconv.c: Fix for #46168 'some additional locale aliases
+	required for chinese support'.
+
+2003-07-25  Jeffrey Stedfast  <fejj@ximian.com>
+
+	* camel-mime-utils.c (header_decode_word): Revert NotZed's fix for
+	bug #42170 - this causes even more problems than it solves. See
+	bug #46331 for info. Basically, each address header would be
+	converted to UTF-8 twice which means no raw 8bit address header
+	would render correctly.
+	(header_decode_mailbox): Perform a sanity check on the resultant
+	addr->str to make sure that it is valid UTF-8, if not convert it
+	to UTF-8. Fixes bug #42170.
+
 2003-07-23  Jeffrey Stedfast  <fejj@ximian.com>

 	* camel-mime-message.c (find_best_encoding): Revert my previous
--- a/camel/camel-iconv.c
+++ b/camel/camel-iconv.c
@ -136,6 +136,9 @@ struct {
 	/* FIXME: Japanese/Korean/Chinese stuff needs checking */
 	{ "euckr-0",        "EUC-KR"     },
 	{ "5601",           "EUC-KR"     },
+	{ "zh_TW-euc",      "EUC-TW"     },
+	{ "zh_CN.euc",      "gb2312"     },
+	{ "zh_TW-big5",     "BIG5"       },
 	{ "big5-0",         "BIG5"       },
 	{ "big5.eten-0",    "BIG5"       },
 	{ "big5hkscs-0",    "BIG5HKSCS"  },
--- a/camel/camel-mime-filter-tohtml.c
+++ b/camel/camel-mime-filter-tohtml.c
@ -32,6 +32,14 @@
 #include "camel-mime-filter-tohtml.h"
 #include "camel-utf8.h"

+/**
+ * TODO: convert common text/plain 'markup' to html. eg.:
+ *
+ * _word_ -> <u>_word_</u>
+ * *word* -> <b>*word*</b>
+ * /word/ -> <i>/word/</i>
+ **/
+
 #define d(x)

 #define CONVERT_WEB_URLS  CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@ -1772,36 +1772,18 @@ header_decode_atom(const char **in)
 }

 static char *
-header_decode_word(const char **in, const char *charset)
+header_decode_word (const char **in)
 {
-	char *out;
-
-	header_decode_lwsp(in);
-	if (**in == '"')
-		out = header_decode_quoted_string(in);
-	else
-		out = header_decode_atom(in);
-
-	/* FIXME: temporary workaround for non-ascii-data problem, see bug #42710 */
-	if (out) {
-		char *p;
-
-		for (p=out;*p;p++) {
-			if ((*p) & 0x80) {
-				GString *newstr = g_string_new("");
-
-				if (charset == NULL || !append_8bit(newstr, out, strlen(out), charset))
-					append_latin1(newstr, out, strlen(out));
-
-				g_free(out);
-				out = newstr->str;
-				g_string_free(newstr, FALSE);
-				break;
-			}
-		}
+	const char *inptr = *in;
+	
+	header_decode_lwsp (&inptr);
+	if (*inptr == '"') {
+		*in = inptr;
+		return header_decode_quoted_string (in);
+	} else {
+		*in = inptr;
+		return header_decode_atom (in);
 	}
-
-	return out;
 }

 static char *
@ -2295,7 +2277,7 @@ header_decode_addrspec(const char **in)
 	header_decode_lwsp(&inptr);

 	/* addr-spec */
-	word = header_decode_word(&inptr, NULL);
+	word = header_decode_word (&inptr);
 	if (word) {
 		addr = g_string_append(addr, word);
 		header_decode_lwsp(&inptr);
@ -2303,7 +2285,7 @@ header_decode_addrspec(const char **in)
 		while (*inptr == '.' && word) {
 			inptr++;
 			addr = g_string_append_c(addr, '.');
-			word = header_decode_word(&inptr, NULL);
+			word = header_decode_word (&inptr);
 			if (word) {
 				addr = g_string_append(addr, word);
 				header_decode_lwsp(&inptr);
@ -2364,7 +2346,7 @@ header_decode_mailbox(const char **in, const char *charset)
 	addr = g_string_new("");

 	/* for each address */
-	pre = header_decode_word(&inptr, charset);
+	pre = header_decode_word (&inptr);
 	header_decode_lwsp(&inptr);
 	if (!(*inptr == '.' || *inptr == '@' || *inptr==',' || *inptr=='\0')) {
 		/* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
@ -2378,7 +2360,7 @@ header_decode_mailbox(const char **in, const char *charset)
 			last = pre;
 			g_free(text);

-			pre = header_decode_word(&inptr, charset);
+			pre = header_decode_word (&inptr);
 			if (pre) {
 				size_t l = strlen (last);
 				size_t p = strlen (pre);
@ -2396,7 +2378,7 @@ header_decode_mailbox(const char **in, const char *charset)
 				while (!pre && *inptr && *inptr != '<') {
 					w(g_warning("Working around stupid mailer bug #5: unescaped characters in names"));
 					name = g_string_append_c(name, *inptr++);
-					pre = header_decode_word(&inptr, charset);
+					pre = header_decode_word (&inptr);
 				}
 			}
 			g_free(last);
@ -2423,7 +2405,7 @@ header_decode_mailbox(const char **in, const char *charset)
 					w(g_warning("broken route-address, missing ':': %s", *in));
 				}
 			}
-			pre = header_decode_word(&inptr, charset);
+			pre = header_decode_word (&inptr);
 			header_decode_lwsp(&inptr);
 		} else {
 			w(g_warning("broken address? %s", *in));
@ -2440,7 +2422,7 @@ header_decode_mailbox(const char **in, const char *charset)
 	while (*inptr == '.' && pre) {
 		inptr++;
 		g_free(pre);
-		pre = header_decode_word(&inptr, charset);
+		pre = header_decode_word (&inptr);
 		addr = g_string_append_c(addr, '.');
 		if (pre)
 			addr = g_string_append(addr, pre);
@ -2542,14 +2524,32 @@ header_decode_mailbox(const char **in, const char *charset)
 	*in = inptr;
 	
 	if (addr->len > 0) {
+		if (!g_utf8_validate (addr->str, addr->len, NULL)) {
+			/* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */
+			const char *locale_charset;
+			GString *out;
+			
+			locale_charset = e_iconv_locale_charset ();
+			
+			out = g_string_new ("");
+			
+			if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset))
+			    && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset)))
+				append_latin1 (out, addr->str, addr->len);
+			
+			g_string_free (addr, TRUE);
+			addr = out;
+		}
+		
 		address = header_address_new_name(name ? name->str : "", addr->str);
 	}
-
+	
+	d(printf("got mailbox: %s\n", addr->str));
+	
 	g_string_free(addr, TRUE);
 	if (name)
 		g_string_free(name, TRUE);
-
-	d(printf("got mailbox: %s\n", addr->str));
+	
 	return address;
 }

@ -2563,7 +2563,7 @@ header_decode_address(const char **in, const char *charset)

 	/* pre-scan, trying to work out format, discard results */
 	header_decode_lwsp(&inptr);
-	while ( (pre = header_decode_word(&inptr, charset)) ) {
+	while ((pre = header_decode_word (&inptr))) {
 		group = g_string_append(group, pre);
 		group = g_string_append(group, " ");
 		g_free(pre);
@ -2678,7 +2678,7 @@ header_contentid_decode (const char *in)
 	}
 	
 	/* Eudora has been known to use <.@> as a content-id */
-	if (!(buf = header_decode_word (&inptr, NULL)) && !strchr (".@", *inptr))
+	if (!(buf = header_decode_word (&inptr)) && !strchr (".@", *inptr))
 		return NULL;
 	
 	addr = g_string_new ("");
@ -2693,10 +2693,10 @@ header_contentid_decode (const char *in)
 		if (!at) {
 			if (*inptr == '.') {
 				g_string_append_c (addr, *inptr++);
-				buf = header_decode_word (&inptr, NULL);
+				buf = header_decode_word (&inptr);
 			} else if (*inptr == '@') {
 				g_string_append_c (addr, *inptr++);
-				buf = header_decode_word (&inptr, NULL);
+				buf = header_decode_word (&inptr);
 				at = TRUE;
 			}
 		} else if (strchr (".[]", *inptr)) {
@ -2769,7 +2769,7 @@ header_references_decode_single (const char **in, struct _header_references **he
 				break;
 			}
 		} else {
-			word = header_decode_word (&inptr, NULL);
+			word = header_decode_word (&inptr);
 			if (word)
 				g_free (word);
 			else if (*inptr != '\0')