Redo the BUILD_MAP code to not depend on libunicode. Now it only generates

* camel-charset-map.c: Redo the BUILD_MAP code to not depend on libunicode. Now it only generates a map of "popular" 8bit encodings. (It's not worthwhile to support obscure encodings, because any mailer that supports them will support UTF8 too. And Chinese and Japanese use mostly the same UTF8 characters so you need to decide between those encodings based on the locale or the charset of the message you're replying to or the input method you used. So this is sufficient for camel_charset_best's use.) * camel-charset-map-private.h: Regenerated. * camel.c (camel_shutdown): Move #ifdefs around to prevent a warning. svn path=/trunk/; revision=10055
2001-05-30 21:32:18 +00:00
parent c12a1660bd
commit 092b7e4449
4 changed files with 294 additions and 4111 deletions
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@ -1,3 +1,20 @@
+2001-05-30  Dan Winship  <danw@ximian.com>
+
+	* camel-charset-map.c: Redo the BUILD_MAP code to not depend on
+	libunicode. Now it only generates a map of "popular" 8bit
+	encodings. (It's not worthwhile to support obscure encodings,
+	because any mailer that supports them will support UTF8 too. And
+	Chinese and Japanese use mostly the same UTF8 characters so you
+	need to decide between those encodings based on the locale or
+	the charset of the message you're replying to or the input
+	method you used. So this is sufficient for camel_charset_best's
+	use.)
+
+	* camel-charset-map-private.h: Regenerated.
+
+	* camel.c (camel_shutdown): Move #ifdefs around to prevent a
+	warning.
+
 2001-05-29  Dan Winship  <danw@ximian.com>

 	* camel-disco-diary.c: Code for logging and replaying offline
--- a/camel/camel-charset-map-private.h
+++ b/camel/camel-charset-map-private.h
--- a/camel/camel-charset-map.c
+++ b/camel/camel-charset-map.c
@ -1,12 +1,42 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; -*- */
+
+/* 
+ * Authors:
+ *   Michael Zucchi <notzed@ximian.com>
+ *   Dan Winship <danw@ximian.com>
+ *
+ * Copyright 2000, 2001 Ximian, Inc. (http://www.ximian.com)
+ *
+ * This program is free software; you can redistribute it and/or 
+ * modify it under the terms of the GNU General Public License as 
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif

 #include <stdio.h>

 /*
-  if you want to build the charset map, add the root directory of
-  libunicode to the include path and define BUILD_MAP,
-  then run it as 
+  if you want to build the charset map, compile this with something like:
+    gcc -DBUILD_MAP camel-charset-map.c `glib-config --cflags`
+  (plus any -I/-L/-l flags you need for iconv), then run it as 
    ./a.out > camel-charset-map-private.h

+  Note that the big-endian variant isn't tested...
+
  The tables genereated work like this:

   An indirect array for each page of unicode character
@ -15,117 +45,83 @@
 */

 #ifdef BUILD_MAP
-#include "iso/iso8859-2.h"
-#include "iso/iso8859-3.h"
-#include "iso/iso8859-4.h"
-#include "iso/iso8859-5.h"
-#include "iso/iso8859-6.h"
-#include "iso/iso8859-7.h"
-#include "iso/iso8859-8.h"
-#include "iso/iso8859-9.h"
-#include "iso/iso8859-10.h"
-#include "iso/iso8859-13.h"
-#include "iso/iso8859-14.h"
-#include "iso/iso8859-15.h"
-#include "iso/windows-1250.h"
-#include "iso/windows-1252.h"
-#include "iso/windows-1257.h"
-#include "iso/koi8-r.h"
-#include "iso/koi8-u.h"
-#include "iso/tis620.2533-1.h"
-#include "iso/armscii-8.h"
-#include "iso/georgian-academy.h"
-#include "iso/georgian-ps.h"
-#include "msft/cp932.h"
-#include "jis/shiftjis.h"
+#include <iconv.h>
+#include <glib.h>

 static struct {
-	unsigned short *table;
 	char *name;
-	int type;		/* type of table */
 	unsigned int bit;	/* assigned bit */
 } tables[] = {
-	{ iso8859_2_table, "iso-8859-2", 0, 0} ,
-	{ iso8859_3_table, "iso-8859-3", 0, 0} ,
-	{ iso8859_4_table, "iso-8859-4", 0, 0},
-	{ iso8859_5_table, "iso-8859-5", 0, 0},
-/* apparently -6 has special digits? */
-	{ iso8859_6_table, "iso-8859-6", 0, 0},
-	{ iso8859_7_table, "iso-8859-7", 0, 0},
-	{ iso8859_8_table, "iso-8859-8", 0, 0},
-	{ iso8859_9_table, "iso-8859-9", 0, 0},
-	{ iso8859_10_table, "iso-8859-10", 0, 0},
-	{ iso8859_13_table, "iso-8859-13", 0, 0},
-	{ iso8859_14_table, "iso-8859-14", 0, 0},
-	{ iso8859_15_table, "iso-8859-15", 0, 0},
-	{ windows_1250_table, "windows-1250", 0, 0},
-	{ windows_1252_table, "windows-1252", 0, 0},
-	{ windows_1257_table, "windows-1257", 0, 0},
-	{ koi8_r_table, "koi8-r", 0, 0},
-	{ koi8_u_table, "koi8-u", 0, 0},
-	{ tis_620_table, "tis620.2533-1", 0, 0},
-	{ armscii_8_table, "armscii-8", 0, 0},
-	{ georgian_academy_table, "georgian-academy", 0, 0},
-	{ georgian_ps_table, "georgian-ps", 0, 0},
-	{ cp932_table, "CP932", 1, 0},
-	{ sjis_table, "Shift-JIS", 1, 0},
-	{ 0, 0}
+	/* These are the 8bit character sets (other than iso-8859-1,
+	 * which is special-cased) which are supported by both other
+	 * mailers and the GNOME environment. Note that the order
+	 * they're listed in is the order they'll be tried in, so put
+	 * the more-popular ones first.
+	 */
+	{ "iso-8859-2", 0 },	/* Central/Eastern European */
+	{ "iso-8859-4", 0 },	/* Baltic */
+	{ "koi8-r", 0 },	/* Russian */
+	{ "windows-1251", 0 },	/* Russian */
+	{ "koi8-u", 0 },	/* Ukranian */
+	{ "iso-8859-5", 0 },	/* Least-popular Russian encoding */
+	{ "iso-8859-7", 0 },	/* Greek */
+	{ "iso-8859-9", 0 },	/* Turkish */
+	{ "iso-8859-13", 0 },	/* Baltic again */
+	{ "iso-8859-15", 0 },	/* New-and-improved iso-8859-1, but most
+				 * programs that support this support UTF8
+				 */
+	{ 0, 0 }
 };

 unsigned int encoding_map[256 * 256];

-static void
-add_bigmap(unsigned short **table, int bit)
-{
-	int i;
-	int j;
-
-	for (i=0;i<256;i++) {
-		unsigned short *tab = table[i];
-		if (tab) {
-			for (j=0;j<256;j++) {
-				if (tab[j])
-					encoding_map[tab[j]] |= bit;
-			}
-		}
-	}
-}
+#if G_BYTE_ORDER == G_BIG_ENDIAN
+#define UCS "UCS-4BE"
+#else
+#define UCS "UCS-4LE"
+#endif

 void main(void)
 {
 	int i, j;
-	unsigned short *tab;
 	int max, min;
 	int bit = 0x01;
 	int k;
 	int bytes;
-
-#if 0
-	/* iso-latin-1 (not needed-detected in code) */
-	for (i=0;i<256;i++) {
-		encoding_map[i] |= bit;
-	}
-	bit <<= 1;
-#endif
+	iconv_t cd;
+	char in[128];
+	guint32 out[128];
+	char *inptr, *outptr;
+	size_t inlen, outlen;

 	/* dont count the terminator */
 	bytes = ((sizeof(tables)/sizeof(tables[0]))+7-1)/8;

-	/* the other latin charsets */
-	for (j=0;tables[j].table;j++) {
-		switch (tables[j].type) {
-		case 0:		/* table from 128-256 */
-			tab = tables[j].table;
-			for (i=0;i<128;i++) {
-				/* 0-127 is the common */
-				encoding_map[i] |= bit;
-				encoding_map[tab[i]] |= bit;
+	for (i = 0; i < 128; i++)
+		in[i] = i + 128;
+
+	for (j = 0; tables[j].name; j++) {
+		cd = iconv_open (UCS, tables[j].name);
+		inptr = in;
+		outptr = (char *)(out);
+		inlen = sizeof (in);
+		outlen = sizeof (out);
+		while (iconv (cd, &inptr, &inlen, &outptr, &outlen) == -1) {
+			if (errno == EILSEQ) {
+				inptr++;
+				inlen--;
+			} else {
+				printf ("%s\n", strerror (errno));
+				exit (1);
 			}
-			break;
-		case 1:		/* sparse table */
-			add_bigmap(tables[j].table, bit);
-			break;
 		}
+		iconv_close (cd);
+
+		for (i = 0; i < 128 - outlen / 4; i++) {
+			encoding_map[i] |= bit;
+			encoding_map[out[i]] |= bit;
+		}
+
 		tables[j].bit = bit;
 		bit <<= 1;
 	}
@ -178,7 +174,7 @@ void main(void)
 	printf("\n};\n\n");

 	printf("struct {\n\tconst char *name;\n\tunsigned int bit;\n} camel_charinfo[] = {\n");
-	for (j=0;tables[j].table;j++) {
+	for (j=0;tables[j].name;j++) {
 		printf("\t{ \"%s\", 0x%04x },\n", tables[j].name, tables[j].bit);
 	}
 	printf("};\n\n");
--- a/camel/camel.c
+++ b/camel/camel.c
@ -38,15 +38,15 @@

 gboolean camel_verbose_debug = FALSE;

+#ifdef HAVE_NSS
 static void
 camel_shutdown (void)
 {
-#ifdef HAVE_NSS
 	NSS_Shutdown ();
 	
 	PR_Cleanup ();
-#endif /* HAVE_NSS */
 }
+#endif /* HAVE_NSS */

 gint
 camel_init (const char *configdir, gboolean nss_init)