fixes a crash on systems that dont have utf7 in iconv.

2002-08-28  Not Zed  <NotZed@Ximian.com>

        * providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned
        to use camel_utf8_utf7 code.
        (imap_mailbox_decode): As above, using camel_utf8_utf7.  'UTF-7'
        isn't a widely support iconv() codeset, and besides the new code
        is simpler.

        * camel-utf8.[ch]: robust utilities for working with utf8 and utf7.

svn path=/trunk/; revision=17886
This commit is contained in:
Not Zed
2002-08-28 07:45:17 +00:00
committed by Michael Zucci
parent 2839143b0b
commit 76d4c1a98e
5 changed files with 300 additions and 251 deletions

View File

@ -1,3 +1,13 @@
2002-08-28 Not Zed <NotZed@Ximian.com>
* providers/imap/camel-imap-utils.c (imap_mailbox_encode): Chagned
to use camel_utf8_utf7 code.
(imap_mailbox_decode): As above, using camel_utf8_utf7. 'UTF-7'
isn't a widely support iconv() codeset, and besides the new code
is simpler.
* camel-utf8.[ch]: robust utilities for working with utf8 and utf7.
2002-08-27 Jeffrey Stedfast <fejj@ximian.com>
* camel-folder-thread.c (camel_folder_thread_messages_new): Now

View File

@ -109,6 +109,7 @@ libcamel_la_SOURCES = \
camel-transport.c \
camel-uid-cache.c \
camel-url.c \
camel-utf8.c \
camel-vee-folder.c \
camel-vee-store.c \
camel-vtrash-folder.c \
@ -208,6 +209,7 @@ libcamelinclude_HEADERS = \
camel-types.h \
camel-uid-cache.h \
camel-url.h \
camel-utf8.h \
camel-vee-folder.h \
camel-vee-store.h \
camel-vtrash-folder.h \

257
camel/camel-utf8.c Normal file
View File

@ -0,0 +1,257 @@
#include <glib.h>
#include "camel-utf8.h"
/**
* camel_utf8_putc:
* @ptr:
* @c:
*
* Output a 32 bit unicode character as utf8 octets. At most 4 octets will
* be written to @ptr. @ptr will be advanced to the next character position.
**/
void
camel_utf8_putc(unsigned char **ptr, guint32 c)
{
register unsigned char *p = *ptr;
if (c <= 0x7f)
*p++ = c;
else if (c <= 0x7ff) {
*p++ = 0xc0 | c >> 6;
*p++ = 0x80 | (c & 0x3f);
} else if (c <= 0xffff) {
*p++ = 0xe0 | c >> 12;
*p++ = 0x80 | ((c >> 6) & 0x3f);
*p++ = 0x80 | (c & 0x3f);
} else {
/* see unicode standard 3.0, S 3.8, max 4 octets */
*p++ = 0xf0 | c >> 18;
*p++ = 0x80 | ((c >> 12) & 0x3f);
*p++ = 0x80 | ((c >> 6) & 0x3f);
*p++ = 0x80 | (c & 0x3f);
}
*ptr = p;
}
/**
* camel_utf8_getc:
* @ptr:
*
* Get a Unicode character from a utf8 stream. @ptr will be advanced
* to the next character position. Invalid utf8 characters will be
* silently skipped. @ptr should point to a NUL terminated array.
*
* Return value: The next Unicode character. @ptr will be advanced to
* the next character always.
**/
guint32
camel_utf8_getc(const unsigned char **ptr)
{
register unsigned char *p = (unsigned char *)*ptr;
register unsigned char c, r;
register guint32 v, m;
again:
r = *p++;
loop:
if (r < 0x80) {
*ptr = p;
v = r;
} else if (r < 0xf8) { /* valid start char? (max 4 octets) */
v = r;
m = 0x7f80; /* used to mask out the length bits */
do {
c = *p++;
if ((c & 0xc0) != 0x80) {
r = c;
goto loop;
}
v = (v<<6) | (c & 0x3f);
r<<=1;
m<<=5;
} while (r & 0x40);
*ptr = p;
v &= ~m;
} else {
goto again;
}
return v;
}
void
g_string_append_u(GString *out, guint32 c)
{
unsigned char buffer[8];
unsigned char *p = buffer;
camel_utf8_putc(&p, c);
*p = 0;
g_string_append(out, buffer);
}
static char *utf7_alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
static unsigned char utf7_rank[256] = {
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3e,0x3f,0xff,0xff,0xff,
0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,
0x0f,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0xff,0xff,0xff,0xff,0xff,
0xff,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x32,0x33,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
};
/**
* camel_utf7_utf8:
* @ptr:
*
* Convert a modified utf7 string to utf8. If the utf7 string
* contains 8 bit characters, they are treated as iso-8859-1.
*
* The IMAP rules [rfc2060] are used in the utf7 encoding.
*
* Return value: The converted string.
**/
char *
camel_utf7_utf8(const char *ptr)
{
const unsigned char *p = (unsigned char *)ptr;
unsigned int c;
guint32 v=0, x;
GString *out;
int i=0;
int state = 0;
char *ret;
out = g_string_new("");
do {
c = *p++;
switch(state) {
case 0:
if (c == '&')
state = 1;
else
g_string_append_u(out, c);
break;
case 1:
if (c == '-') {
g_string_append_c(out, '&');
state = 0;
} else if (utf7_rank[c] != 0xff) {
v = utf7_rank[c];
i = 6;
state = 2;
} else {
/* invalid */
g_string_append(out, "&-");
state = 0;
}
break;
case 2:
if (c == '-') {
state = 0;
} else if (utf7_rank[c] != 0xff) {
v = (v<<6) | utf7_rank[c];
i+=6;
if (i >= 16) {
x = (v >> (i-16)) & 0xffff;
g_string_append_u(out, x);
i-=16;
}
} else {
g_string_append_u(out, c);
state = 0;
}
break;
}
} while (c);
ret = g_strdup(out->str);
g_string_free(out, TRUE);
return ret;
}
static void utf7_closeb64(GString *out, guint32 v, guint32 i)
{
guint32 x;
if (i>0) {
x = (v << (6-i)) & 0x3f;
g_string_append_c(out, utf7_alphabet[x]);
}
g_string_append_c(out, '-');
}
/**
* camel_utf8_utf7:
* @ptr:
*
* Convert a utf8 string to a modified utf7 format.
*
* The IMAP rules [rfc2060] are used in the utf7 encoding.
*
* Return value:
**/
char *
camel_utf8_utf7(const char *ptr)
{
const unsigned char *p = (unsigned char *)ptr;
unsigned int c;
guint32 x, v = 0;
int state = 0;
GString *out;
int i = 0;
char *ret;
out = g_string_new("");
while ( (c = camel_utf8_getc(&p)) ) {
if (c >= 0x20 && c <= 0x7e) {
if (state == 1) {
utf7_closeb64(out, v, i);
state = 0;
i = 0;
}
if (c == '&')
g_string_append(out, "&-");
else
g_string_append_c(out, c);
} else {
if (state == 0) {
g_string_append_c(out, '&');
state = 1;
}
v = (v << 16) | c;
i += 16;
while (i >= 6) {
x = (v >> (i-6)) & 0x3f;
g_string_append_c(out, utf7_alphabet[x]);
i -= 6;
}
}
}
if (state == 1)
utf7_closeb64(out, v, i);
ret = g_strdup(out->str);
g_string_free(out, TRUE);
return ret;
}

16
camel/camel-utf8.h Normal file
View File

@ -0,0 +1,16 @@
#ifndef _CAMEL_UTF8_H
#define _CAMEL_UTF8_H
void camel_utf8_putc(unsigned char **ptr, guint32 c);
guint32 camel_utf8_getc(const unsigned char **ptr);
/* utility func for utf8 gstrings */
void g_string_append_u(GString *out, guint32 c);
/* convert utf7 to/from utf8, actually this is modified IMAP utf7 */
char *camel_utf7_utf8(const char *ptr);
char *camel_utf8_utf7(const char *ptr);
#endif /* ! _CAMEL_UTF8_H */

View File

@ -30,6 +30,7 @@
#include "camel-imap-summary.h"
#include "camel-imap-store.h"
#include "camel-folder.h"
#include "camel-utf8.h"
#define d(x) x
@ -1119,263 +1120,26 @@ imap_concat (CamelImapStore *imap_store, const char *prefix, const char *suffix)
return g_strdup_printf ("%s%c%s", prefix, imap_store->dir_sep, suffix);
}
#define UTF8_TO_UTF7_LEN(len) ((len * 3) + 8)
#define UTF7_TO_UTF8_LEN(len) (len)
enum {
MODE_USASCII,
MODE_AMPERSAND,
MODE_MODUTF7
};
#define is_usascii(c) (((c) >= 0x20 && (c) <= 0x25) || ((c) >= 0x27 && (c) <= 0x7e))
#define encode_mode(c) (is_usascii (c) ? MODE_USASCII : (c) == '&' ? MODE_AMPERSAND : MODE_MODUTF7)
char *
imap_mailbox_encode (const unsigned char *in, size_t inlen)
{
const unsigned char *start, *inptr, *inend;
unsigned char *mailbox, *m, *mend;
size_t inleft, outleft, conv;
char *inbuf, *outbuf;
iconv_t cd;
int mode;
cd = (iconv_t) -1;
m = mailbox = g_malloc (UTF8_TO_UTF7_LEN (inlen) + 1);
mend = mailbox + UTF8_TO_UTF7_LEN (inlen);
start = inptr = in;
inend = in + inlen;
mode = MODE_USASCII;
while (inptr < inend) {
int new_mode;
new_mode = encode_mode (*inptr);
if (new_mode != mode) {
switch (mode) {
case MODE_USASCII:
memcpy (m, start, inptr - start);
m += (inptr - start);
break;
case MODE_AMPERSAND:
while (start < inptr) {
*m++ = '&';
*m++ = '-';
start++;
}
break;
case MODE_MODUTF7:
inbuf = (char *) start;
inleft = inptr - start;
outbuf = (char *) m;
outleft = mend - m;
if (cd == (iconv_t) -1)
cd = iconv_open ("UTF-7", "UTF-8");
conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
if (conv == (size_t) -1) {
g_warning ("error converting mailbox to UTF-7!");
}
iconv (cd, NULL, NULL, &outbuf, &outleft);
/* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
*m++ = '&';
while (m < (unsigned char *) outbuf) {
/* replace '/' with ',' */
if (*m == '/')
*m = ',';
m++;
}
break;
}
mode = new_mode;
start = inptr;
}
inptr++;
}
switch (mode) {
case MODE_USASCII:
memcpy (m, start, inptr - start);
m += (inptr - start);
break;
case MODE_AMPERSAND:
while (start < inptr) {
*m++ = '&';
*m++ = '-';
start++;
}
break;
case MODE_MODUTF7:
inbuf = (char *) start;
inleft = inptr - start;
outbuf = (char *) m;
outleft = mend - m;
if (cd == (iconv_t) -1)
cd = iconv_open ("UTF-7", "UTF-8");
conv = iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
if (conv == (size_t) -1) {
g_warning ("error converting mailbox to UTF-7!");
}
iconv (cd, NULL, NULL, &outbuf, &outleft);
/* shift into modified UTF-7 mode (overwrite UTF-7's '+' shift)... */
*m++ = '&';
while (m < (unsigned char *) outbuf) {
/* replace '/' with ',' */
if (*m == '/')
*m = ',';
m++;
}
break;
}
*m = '\0';
if (cd != (iconv_t) -1)
iconv_close (cd);
return mailbox;
}
char *buf;
buf = alloca(inlen+1);
memcpy(buf, in, inlen);
buf[inlen] = 0;
return camel_utf8_utf7(buf);
}
char *
imap_mailbox_decode (const unsigned char *in, size_t inlen)
{
const unsigned char *start, *inptr, *inend;
unsigned char *mailbox, *m, *mend;
unsigned char mode_switch;
iconv_t cd;
cd = (iconv_t) -1;
m = mailbox = g_malloc (UTF7_TO_UTF8_LEN (inlen) + 1);
mend = mailbox + UTF7_TO_UTF8_LEN (inlen);
start = inptr = in;
inend = in + inlen;
mode_switch = '&';
while (inptr < inend) {
if (*inptr == mode_switch) {
if (mode_switch == '&') {
/* mode switch from US-ASCII to UTF-7 */
mode_switch = '-';
memcpy (m, start, inptr - start);
m += (inptr - start);
start = inptr;
} else if (mode_switch == '-') {
/* mode switch from UTF-7 to US-ASCII or an ampersand (&) */
mode_switch = '&';
start++;
if (start == inptr) {
/* we had the sequence "&-" which becomes "&" when decoded */
*m++ = '&';
} else {
char *buffer, *inbuf, *outbuf;
size_t buflen, outleft, conv;
buflen = (inptr - start) + 2;
inbuf = buffer = alloca (buflen);
*inbuf++ = '+';
while (start < inptr) {
*inbuf++ = *start == ',' ? '/' : *start;
start++;
}
*inbuf = '-';
inbuf = buffer;
outbuf = (char *) m;
outleft = mend - m;
if (cd == (iconv_t) -1)
cd = iconv_open ("UTF-8", "UTF-7");
conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
if (conv == (size_t) -1) {
g_warning ("error decoding mailbox: %.*s", inlen, in);
}
iconv (cd, NULL, NULL, NULL, NULL);
m = (unsigned char *) outbuf;
}
/* point to the char after the '-' */
start = inptr + 1;
}
}
inptr++;
}
if (*inptr == mode_switch) {
if (mode_switch == '&') {
/* the remaining text is US-ASCII */
memcpy (m, start, inptr - start);
m += (inptr - start);
start = inptr;
} else if (mode_switch == '-') {
/* We've got encoded UTF-7 or else an ampersand */
start++;
if (start == inptr) {
/* we had the sequence "&-" which becomes "&" when decoded */
*m++ = '&';
} else {
char *buffer, *inbuf, *outbuf;
size_t buflen, outleft, conv;
buflen = (inptr - start) + 2;
inbuf = buffer = alloca (buflen);
*inbuf++ = '+';
while (start < inptr) {
*inbuf++ = *start == ',' ? '/' : *start;
start++;
}
*inbuf = '-';
inbuf = buffer;
outbuf = (char *) m;
outleft = mend - m;
if (cd == (iconv_t) -1)
cd = iconv_open ("UTF-8", "UTF-7");
conv = iconv (cd, &inbuf, &buflen, &outbuf, &outleft);
if (conv == (size_t) -1) {
g_warning ("error decoding mailbox: %.*s", inlen, in);
}
iconv (cd, NULL, NULL, NULL, NULL);
m = (unsigned char *) outbuf;
}
}
} else {
if (mode_switch == '-') {
/* illegal encoded mailbox... */
g_warning ("illegal mailbox name encountered: %.*s", inlen, in);
}
memcpy (m, start, inptr - start);
m += (inptr - start);
}
*m = '\0';
if (cd != (iconv_t) -1)
iconv_close (cd);
return mailbox;
char *buf;
buf = alloca(inlen+1);
memcpy(buf, in, inlen);
buf[inlen] = 0;
return camel_utf7_utf8(buf);
}