fixes a crash on systems that dont have utf7 in iconv.

2002-08-28 Not Zed <NotZed@Ximian.com> * providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned to use camel_utf8_utf7 code. (imap_mailbox_decode): As above, using camel_utf8_utf7. 'UTF-7' isn't a widely support iconv() codeset, and besides the new code is simpler. * camel-utf8.[ch]: robust utilities for working with utf8 and utf7. svn path=/trunk/; revision=17886
2002-08-28 07:45:17 +00:00
parent 2839143b0b
commit 76d4c1a98e
5 changed files with 300 additions and 251 deletions
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@ -1,3 +1,13 @@
+2002-08-28  Not Zed  <NotZed@Ximian.com>
+
+	* providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned
+	to use camel_utf8_utf7 code.
+	(imap_mailbox_decode): As above, using camel_utf8_utf7.  'UTF-7'
+	isn't a widely support iconv() codeset, and besides the new code
+	is simpler.
+
+	* camel-utf8.[ch]: robust utilities for working with utf8 and utf7.
+
 2002-08-27  Jeffrey Stedfast  <fejj@ximian.com>

 	* camel-folder-thread.c (camel_folder_thread_messages_new): Now
--- a/camel/Makefile.am
+++ b/camel/Makefile.am
@ -109,6 +109,7 @@ libcamel_la_SOURCES = 				\
 	camel-transport.c			\
 	camel-uid-cache.c			\
 	camel-url.c				\
+	camel-utf8.c				\
 	camel-vee-folder.c			\
 	camel-vee-store.c			\
 	camel-vtrash-folder.c			\
@ -208,6 +209,7 @@ libcamelinclude_HEADERS =			\
 	camel-types.h				\
 	camel-uid-cache.h			\
 	camel-url.h				\
+	camel-utf8.h				\
 	camel-vee-folder.h			\
 	camel-vee-store.h			\
 	camel-vtrash-folder.h			\
--- a/camel/camel-utf8.c
+++ b/camel/camel-utf8.c
@ -0,0 +1,257 @@
+
+#include <glib.h>
+#include "camel-utf8.h"
+
+/**
+ * camel_utf8_putc:
+ * @ptr: 
+ * @c: 
+ * 
+ * Output a 32 bit unicode character as utf8 octets.  At most 4 octets will
+ * be written to @ptr.  @ptr will be advanced to the next character position.
+ **/
+void
+camel_utf8_putc(unsigned char **ptr, guint32 c)
+{
+	register unsigned char *p = *ptr;
+
+	if (c <= 0x7f)
+		*p++ = c;
+	else if (c <= 0x7ff) {
+		*p++ = 0xc0 | c >> 6;
+		*p++ = 0x80 | (c & 0x3f);
+	} else if (c <= 0xffff) {
+		*p++ = 0xe0 | c >> 12;
+		*p++ = 0x80 | ((c >> 6) & 0x3f);
+		*p++ = 0x80 | (c & 0x3f);
+	} else {
+		/* see unicode standard 3.0, S 3.8, max 4 octets */
+		*p++ = 0xf0 | c >> 18;
+		*p++ = 0x80 | ((c >> 12) & 0x3f);
+		*p++ = 0x80 | ((c >> 6) & 0x3f);
+		*p++ = 0x80 | (c & 0x3f);
+	}
+
+	*ptr = p;
+}
+
+/**
+ * camel_utf8_getc:
+ * @ptr: 
+ * 
+ * Get a Unicode character from a utf8 stream.  @ptr will be advanced
+ * to the next character position.  Invalid utf8 characters will be
+ * silently skipped.  @ptr should point to a NUL terminated array.
+ * 
+ * Return value: The next Unicode character.  @ptr will be advanced to
+ * the next character always.
+ **/
+guint32
+camel_utf8_getc(const unsigned char **ptr)
+{
+	register unsigned char *p = (unsigned char *)*ptr;
+	register unsigned char c, r;
+	register guint32 v, m;
+
+again:
+	r = *p++;
+loop:
+	if (r < 0x80) {
+		*ptr = p;
+		v = r;
+	} else if (r < 0xf8) { /* valid start char? (max 4 octets) */
+		v = r;
+		m = 0x7f80;	/* used to mask out the length bits */
+		do {
+			c = *p++;
+			if ((c & 0xc0) != 0x80) {
+				r = c;
+				goto loop;
+			}
+			v = (v<<6) | (c & 0x3f);
+			r<<=1;
+			m<<=5;
+		} while (r & 0x40);
+		
+		*ptr = p;
+
+		v &= ~m;
+	} else {
+		goto again;
+	}
+
+	return v;
+}
+
+void
+g_string_append_u(GString *out, guint32 c)
+{
+	unsigned char buffer[8];
+	unsigned char *p = buffer;
+
+	camel_utf8_putc(&p, c);
+	*p = 0;
+	g_string_append(out, buffer);
+}
+
+static char *utf7_alphabet =
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+static unsigned char utf7_rank[256] = {
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff,
+	0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
+	0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
+	0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
+	0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+};
+
+/**
+ * camel_utf7_utf8:
+ * @ptr: 
+ * 
+ * Convert a modified utf7 string to utf8.  If the utf7 string
+ * contains 8 bit characters, they are treated as iso-8859-1.
+ * 
+ * The IMAP rules [rfc2060] are used in the utf7 encoding.
+ *
+ * Return value: The converted string.
+ **/
+char *
+camel_utf7_utf8(const char *ptr)
+{
+	const unsigned char *p = (unsigned char *)ptr;
+	unsigned int c;
+	guint32 v=0, x;
+	GString *out;
+	int i=0;
+	int state = 0;
+	char *ret;
+
+	out = g_string_new("");
+	do {
+		c = *p++;
+		switch(state) {
+		case 0:
+			if (c == '&')
+				state = 1;
+			else
+				g_string_append_u(out, c);
+			break;
+		case 1:
+			if (c == '-') {
+				g_string_append_c(out, '&');
+				state = 0;
+			} else if (utf7_rank[c] != 0xff) {
+				v = utf7_rank[c];
+				i = 6;
+				state = 2;
+			} else {
+				/* invalid */
+				g_string_append(out, "&-");
+				state = 0;
+			}
+			break;
+		case 2:
+			if (c == '-') {
+				state = 0;
+			} else if (utf7_rank[c] != 0xff) {
+				v = (v<<6) | utf7_rank[c];
+				i+=6;
+				if (i >= 16) {
+					x = (v >> (i-16)) & 0xffff;
+					g_string_append_u(out, x);
+					i-=16;
+				}
+			} else {
+				g_string_append_u(out, c);
+				state = 0;
+			}
+			break;
+		}
+	} while (c);
+
+	ret = g_strdup(out->str);
+	g_string_free(out, TRUE);
+
+	return ret;
+}
+
+static void utf7_closeb64(GString *out, guint32 v, guint32 i)
+{
+	guint32 x;
+
+	if (i>0) {
+		x = (v << (6-i)) & 0x3f;
+		g_string_append_c(out, utf7_alphabet[x]);
+	}
+	g_string_append_c(out, '-');
+}
+
+/**
+ * camel_utf8_utf7:
+ * @ptr: 
+ * 
+ * Convert a utf8 string to a modified utf7 format.
+ *
+ * The IMAP rules [rfc2060] are used in the utf7 encoding.
+ * 
+ * Return value: 
+ **/
+char *
+camel_utf8_utf7(const char *ptr)
+{
+	const unsigned char *p = (unsigned char *)ptr;
+	unsigned int c;
+	guint32 x, v = 0;
+	int state = 0;
+	GString *out;
+	int i = 0;
+	char *ret;
+
+	out = g_string_new("");
+
+	while ( (c = camel_utf8_getc(&p)) ) {
+		if (c >= 0x20 && c <= 0x7e) {
+			if (state == 1) {
+				utf7_closeb64(out, v, i);
+				state = 0;
+				i = 0;
+			}
+			if (c == '&')
+				g_string_append(out, "&-");
+			else
+				g_string_append_c(out, c);
+		} else {
+			if (state == 0) {
+				g_string_append_c(out, '&');
+				state = 1;
+			}
+			v = (v << 16) | c;
+			i += 16;
+			while (i >= 6) {
+				x = (v >> (i-6)) & 0x3f;
+				g_string_append_c(out, utf7_alphabet[x]);
+				i -= 6;
+			}
+		}
+	}
+
+	if (state == 1)
+		utf7_closeb64(out, v, i);
+
+	ret = g_strdup(out->str);
+	g_string_free(out, TRUE);
+
+	return ret;
+}
--- a/camel/camel-utf8.h
+++ b/camel/camel-utf8.h
@ -0,0 +1,16 @@
+
+#ifndef _CAMEL_UTF8_H
+#define _CAMEL_UTF8_H
+
+void camel_utf8_putc(unsigned char **ptr, guint32 c);
+guint32 camel_utf8_getc(const unsigned char **ptr);
+
+/* utility func for utf8 gstrings */
+void g_string_append_u(GString *out, guint32 c);
+
+/* convert utf7 to/from utf8, actually this is modified IMAP utf7 */
+char *camel_utf7_utf8(const char *ptr);
+char *camel_utf8_utf7(const char *ptr);
+
+
+#endif /* ! _CAMEL_UTF8_H */
--- a/camel/providers/imap/camel-imap-utils.c
+++ b/camel/providers/imap/camel-imap-utils.c
@ -30,6 +30,7 @@
 #include "camel-imap-summary.h"
 #include "camel-imap-store.h"
 #include "camel-folder.h"
+#include "camel-utf8.h"

 #define d(x) x

@ -1119,263 +1120,26 @@ imap_concat (CamelImapStore *imap_store, const char *prefix, const char *suffix)
 		return g_strdup_printf ("%s%c%s", prefix, imap_store->dir_sep, suffix);
 }

-#define UTF8_TO_UTF7_LEN(len)  ((len * 3) + 8)
-#define UTF7_TO_UTF8_LEN(len)  (len)
-
-enum {
-	MODE_USASCII,
-	MODE_AMPERSAND,
-	MODE_MODUTF7
-};
-
-#define is_usascii(c)  (((c) >= 0x20 && (c) <= 0x25) || ((c) >= 0x27 && (c) <= 0x7e))
-#define encode_mode(c) (is_usascii (c) ? MODE_USASCII : (c) == '&' ? MODE_AMPERSAND : MODE_MODUTF7)
-
 char *
 imap_mailbox_encode (const unsigned char *in, size_t inlen)
 {
-	const unsigned char *start, *inptr, *inend;
-	unsigned char *mailbox, *m, *mend;
-	size_t inleft, outleft, conv;
-	char *inbuf, *outbuf;
-	iconv_t cd;
-	int mode;
-	
-	cd = (iconv_t) -1;
-	m = mailbox = g_malloc (UTF8_TO_UTF7_LEN (inlen) + 1);
-	mend = mailbox + UTF8_TO_UTF7_LEN (inlen);
-	
-	start = inptr = in;
-	inend = in + inlen;
-	mode = MODE_USASCII;
-	
-	while (inptr < inend) {
-		int new_mode;
-		
-		new_mode = encode_mode (*inptr);
-		
-		if (new_mode != mode) {
-			switch (mode) {
-			case MODE_USASCII:
-				memcpy (m, start, inptr - start);
-				m += (inptr - start);
-				break;
-			case MODE_AMPERSAND:
-				while (start < inptr) {
-					*m++ = '&';
-					*m++ = '-';
-					start++;
-				}
-				break;
-			case MODE_MODUTF7:
-				inbuf = (char *) start;
-				inleft = inptr - start;
-				outbuf = (char *) m;
-				outleft = mend - m;
-				
-				if (cd == (iconv_t) -1)
-					cd = iconv_open ("UTF-7", "UTF-8");
-				
-				conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
-				if (conv == (size_t) -1) {
-					g_warning ("error converting mailbox to UTF-7!");
-				}
-				iconv (cd, NULL, NULL, &outbuf, &outleft);
-				
-				/* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
-				*m++ = '&';
-				
-				while (m < (unsigned char *) outbuf) {
-					/* replace '/' with ',' */
-					if (*m == '/')
-						*m = ',';
-					
-					m++;
-				}
-				
-				break;
-			}
-			
-			mode = new_mode;
-			start = inptr;
-		}
-		
-		inptr++;
-	}
-	
-	switch (mode) {
-	case MODE_USASCII:
-		memcpy (m, start, inptr - start);
-		m += (inptr - start);
-		break;
-	case MODE_AMPERSAND:
-		while (start < inptr) {
-			*m++ = '&';
-			*m++ = '-';
-			start++;
-		}
-		break;
-	case MODE_MODUTF7:
-		inbuf = (char *) start;
-		inleft = inptr - start;
-		outbuf = (char *) m;
-		outleft = mend - m;
-		
-		if (cd == (iconv_t) -1)
-			cd = iconv_open ("UTF-7", "UTF-8");
-		
-		conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
-		if (conv == (size_t) -1) {
-			g_warning ("error converting mailbox to UTF-7!");
-		}
-		iconv (cd, NULL, NULL, &outbuf, &outleft);
-		
-		/* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
-		*m++ = '&';
-		
-		while (m < (unsigned char *) outbuf) {
-			/* replace '/' with ',' */
-			if (*m == '/')
-				*m = ',';
-			
-			m++;
-		}
-		
-		break;
-	}
-	
-	*m = '\0';
-	
-	if (cd != (iconv_t) -1)
-		iconv_close (cd);
-	
-	return mailbox;
-}
+	char *buf;

+	buf = alloca(inlen+1);
+	memcpy(buf, in, inlen);
+	buf[inlen] = 0;
+
+	return camel_utf8_utf7(buf);
+}

 char *
 imap_mailbox_decode (const unsigned char *in, size_t inlen)
 {
-	const unsigned char *start, *inptr, *inend;
-	unsigned char *mailbox, *m, *mend;
-	unsigned char mode_switch;
-	iconv_t cd;
-	
-	cd = (iconv_t) -1;
-	m = mailbox = g_malloc (UTF7_TO_UTF8_LEN (inlen) + 1);
-	mend = mailbox + UTF7_TO_UTF8_LEN (inlen);
-	
-	start = inptr = in;
-	inend = in + inlen;
-	mode_switch = '&';
-	
-	while (inptr < inend) {
-		if (*inptr == mode_switch) {
-			if (mode_switch == '&') {
-				/* mode switch from US-ASCII to UTF-7 */
-				mode_switch = '-';
-				memcpy (m, start, inptr - start);
-				m += (inptr - start);
-				start = inptr;
-			} else if (mode_switch == '-') {
-				/* mode switch from UTF-7 to US-ASCII or an ampersand (&) */
-				mode_switch = '&';
-				start++;
-				if (start == inptr) {
-					/* we had the sequence "&-" which becomes "&" when decoded */
-					*m++ = '&';
-				} else {
-					char *buffer, *inbuf, *outbuf;
-					size_t buflen, outleft, conv;
-					
-					buflen = (inptr - start) + 2;
-					inbuf = buffer = alloca (buflen);
-					*inbuf++ = '+';
-					while (start < inptr) {
-						*inbuf++ = *start == ',' ? '/' : *start;
-						start++;
-					}
-					*inbuf = '-';
-					
-					inbuf = buffer;
-					outbuf = (char *) m;
-					outleft = mend - m;
-					
-					if (cd == (iconv_t) -1)
-						cd = iconv_open ("UTF-8", "UTF-7");
-					
-					conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
-					if (conv == (size_t) -1) {
-						g_warning ("error decoding mailbox: %.*s", inlen, in);
-					}
-					iconv (cd, NULL, NULL, NULL, NULL);
-					
-					m = (unsigned char *) outbuf;
-				}
-				
-				/* point to the char after the '-' */
-				start = inptr + 1;
-			}
-		}
-		
-		inptr++;
-	}
-	
-	if (*inptr == mode_switch) {
-		if (mode_switch == '&') {
-			/* the remaining text is US-ASCII */
-			memcpy (m, start, inptr - start);
-			m += (inptr - start);
-			start = inptr;
-		} else if (mode_switch == '-') {
-			/* We've got encoded UTF-7 or else an ampersand */
-			start++;
-			if (start == inptr) {
-				/* we had the sequence "&-" which becomes "&" when decoded */
-				*m++ = '&';
-			} else {
-				char *buffer, *inbuf, *outbuf;
-				size_t buflen, outleft, conv;
-				
-				buflen = (inptr - start) + 2;
-				inbuf = buffer = alloca (buflen);
-				*inbuf++ = '+';
-				while (start < inptr) {
-					*inbuf++ = *start == ',' ? '/' : *start;
-					start++;
-				}
-				*inbuf = '-';
-				
-				inbuf = buffer;
-				outbuf = (char *) m;
-				outleft = mend - m;
-				
-				if (cd == (iconv_t) -1)
-					cd = iconv_open ("UTF-8", "UTF-7");
-				
-				conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
-				if (conv == (size_t) -1) {
-					g_warning ("error decoding mailbox: %.*s", inlen, in);
-				}
-				iconv (cd, NULL, NULL, NULL, NULL);
-				
-				m = (unsigned char *) outbuf;
-			}
-		}
-	} else {
-		if (mode_switch == '-') {
-			/* illegal encoded mailbox... */
-			g_warning ("illegal mailbox name encountered: %.*s", inlen, in);
-		}
-		
-		memcpy (m, start, inptr - start);
-		m += (inptr - start);
-	}
-	
-	*m = '\0';
-	
-	if (cd != (iconv_t) -1)
-		iconv_close (cd);
-	
-	return mailbox;
+	char *buf;
+
+	buf = alloca(inlen+1);
+	memcpy(buf, in, inlen);
+	buf[inlen] = 0;
+
+	return camel_utf7_utf8(buf);
 }