Read the characters as utf8, rather than as 8 bit bytes. Remove the

2003-01-13  Not Zed  <NotZed@Ximian.com>

        * camel-mime-filter-tohtml.c (writeln): Read the characters as
        utf8, rather than as 8 bit bytes.  Remove the PRESERVE_8BIT as it
        has no meaning.  Also change the default logic slightly so that 8
        bit or greater characters are properly converted to entities.

        * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8
        char, bounded by an end pointer.

svn path=/trunk/; revision=19421
This commit is contained in:
Not Zed
2003-01-13 05:46:35 +00:00
committed by Michael Zucci
parent 969b2c6b65
commit 9b60cad3dc
3 changed files with 87 additions and 17 deletions

View File

@ -1,3 +1,13 @@
2003-01-13 Not Zed <NotZed@Ximian.com>
* camel-mime-filter-tohtml.c (writeln): Read the characters as
utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it
has no meaning. Also change the default logic slightly so that 8
bit or greater characters are properly converted to entities.
* camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8
char, bounded by an end pointer.
2003-01-07 Dan Winship <danw@ximian.com> 2003-01-07 Dan Winship <danw@ximian.com>
* camel-provider.h (CamelProvider): add a "translation_domain" * camel-provider.h (CamelProvider): add a "translation_domain"

View File

@ -28,6 +28,7 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "camel-utf8.h"
#include "camel-url-scanner.h" #include "camel-url-scanner.h"
#include "camel-mime-filter-tohtml.h" #include "camel-mime-filter-tohtml.h"
@ -147,14 +148,18 @@ static char *
writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend) writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend)
{ {
CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter; CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
register const char *inptr = in; const char *inptr = in;
while (inptr < inend) { while (inptr < inend) {
unsigned char u; guint32 u;
outptr = check_size (filter, outptr, outend, 9); outptr = check_size (filter, outptr, outend, 16);
switch ((u = (unsigned char) *inptr++)) { u = camel_utf8_getc_limit(&inptr, inend);
switch (u) {
case 0xffff:
g_warning("Truncated utf8 buffer");
return outptr;
case '<': case '<':
outptr = g_stpcpy (outptr, "&lt;"); outptr = g_stpcpy (outptr, "&lt;");
html->column++; html->column++;
@ -182,22 +187,21 @@ writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outpt
} }
/* otherwise, FALL THROUGH */ /* otherwise, FALL THROUGH */
case ' ': case ' ':
if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) { if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
if (inptr == (in + 1) || *inptr == ' ' || *inptr == '\t') { && ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) {
outptr = g_stpcpy (outptr, "&nbsp;"); outptr = g_stpcpy (outptr, "&nbsp;");
html->column++; html->column++;
break; break;
} }
}
/* otherwise, FALL THROUGH */ /* otherwise, FALL THROUGH */
default: default:
if (!(u >= 0x20 && u < 0x80) && !(html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT)) { if (u >= 20 && u <0x80)
*outptr++ = u;
else {
if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT) if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
*outptr++ = '?'; *outptr++ = '?';
else else
outptr += g_snprintf (outptr, 9, "&#%d;", (int) u); outptr += sprintf(outptr, "&#%u;", u);
} else {
*outptr++ = (char) u;
} }
html->column++; html->column++;
break; break;

View File

@ -83,6 +83,62 @@ loop:
return v; return v;
} }
/**
* camel_utf8_getc_limit:
* @ptr:
* @end: must not be NULL.
*
* Get the next utf8 char at @ptr, and return it, advancing @ptr to
* the next character. If @end is reached before a full utf8
* character can be read, then the invalid Unicode char 0xffff is
* returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not
* advanced.
*
* Return value: The next utf8 char, or 0xffff.
**/
guint32
camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end)
{
register unsigned char *p = (unsigned char *)*ptr;
register unsigned char c, r;
register guint32 v = 0xffff, m;
again:
while (p < end) {
r = *p++;
loop:
if (r < 0x80) {
*ptr = p;
return r;
} else if (r < 0xf8) { /* valid start char? (max 4 octets) */
v = r;
m = 0x7f80; /* used to mask out the length bits */
do {
if (p >= end)
return 0xffff;
c = *p++;
if ((c & 0xc0) != 0x80) {
r = c;
goto loop;
}
v = (v<<6) | (c & 0x3f);
r<<=1;
m<<=5;
} while (r & 0x40);
*ptr = p;
v &= ~m;
return v;
} else {
goto again;
}
}
return 0xffff;
}
void void
g_string_append_u(GString *out, guint32 c) g_string_append_u(GString *out, guint32 c)
{ {