Read the characters as utf8, rather than as 8 bit bytes. Remove the
2003-01-13 Not Zed <NotZed@Ximian.com> * camel-mime-filter-tohtml.c (writeln): Read the characters as utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it has no meaning. Also change the default logic slightly so that 8 bit or greater characters are properly converted to entities. * camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8 char, bounded by an end pointer. svn path=/trunk/; revision=19421
This commit is contained in:
@ -1,3 +1,13 @@
|
|||||||
|
2003-01-13 Not Zed <NotZed@Ximian.com>
|
||||||
|
|
||||||
|
* camel-mime-filter-tohtml.c (writeln): Read the characters as
|
||||||
|
utf8, rather than as 8 bit bytes. Remove the PRESERVE_8BIT as it
|
||||||
|
has no meaning. Also change the default logic slightly so that 8
|
||||||
|
bit or greater characters are properly converted to entities.
|
||||||
|
|
||||||
|
* camel-utf8.c (camel_utf8_getc_limit): new function, gets a utf8
|
||||||
|
char, bounded by an end pointer.
|
||||||
|
|
||||||
2003-01-07 Dan Winship <danw@ximian.com>
|
2003-01-07 Dan Winship <danw@ximian.com>
|
||||||
|
|
||||||
* camel-provider.h (CamelProvider): add a "translation_domain"
|
* camel-provider.h (CamelProvider): add a "translation_domain"
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "camel-utf8.h"
|
||||||
#include "camel-url-scanner.h"
|
#include "camel-url-scanner.h"
|
||||||
#include "camel-mime-filter-tohtml.h"
|
#include "camel-mime-filter-tohtml.h"
|
||||||
|
|
||||||
@ -147,14 +148,18 @@ static char *
|
|||||||
writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend)
|
writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outptr, char **outend)
|
||||||
{
|
{
|
||||||
CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
|
CamelMimeFilterToHTML *html = (CamelMimeFilterToHTML *) filter;
|
||||||
register const char *inptr = in;
|
const char *inptr = in;
|
||||||
|
|
||||||
while (inptr < inend) {
|
while (inptr < inend) {
|
||||||
unsigned char u;
|
guint32 u;
|
||||||
|
|
||||||
outptr = check_size (filter, outptr, outend, 9);
|
outptr = check_size (filter, outptr, outend, 16);
|
||||||
|
|
||||||
switch ((u = (unsigned char) *inptr++)) {
|
u = camel_utf8_getc_limit(&inptr, inend);
|
||||||
|
switch (u) {
|
||||||
|
case 0xffff:
|
||||||
|
g_warning("Truncated utf8 buffer");
|
||||||
|
return outptr;
|
||||||
case '<':
|
case '<':
|
||||||
outptr = g_stpcpy (outptr, "<");
|
outptr = g_stpcpy (outptr, "<");
|
||||||
html->column++;
|
html->column++;
|
||||||
@ -182,22 +187,21 @@ writeln (CamelMimeFilter *filter, const char *in, const char *inend, char *outpt
|
|||||||
}
|
}
|
||||||
/* otherwise, FALL THROUGH */
|
/* otherwise, FALL THROUGH */
|
||||||
case ' ':
|
case ' ':
|
||||||
if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES) {
|
if (html->flags & CAMEL_MIME_FILTER_TOHTML_CONVERT_SPACES
|
||||||
if (inptr == (in + 1) || *inptr == ' ' || *inptr == '\t') {
|
&& ((inptr == (in + 1) || *inptr == ' ' || *inptr == '\t'))) {
|
||||||
outptr = g_stpcpy (outptr, " ");
|
outptr = g_stpcpy (outptr, " ");
|
||||||
html->column++;
|
html->column++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
/* otherwise, FALL THROUGH */
|
/* otherwise, FALL THROUGH */
|
||||||
default:
|
default:
|
||||||
if (!(u >= 0x20 && u < 0x80) && !(html->flags & CAMEL_MIME_FILTER_TOHTML_PRESERVE_8BIT)) {
|
if (u >= 20 && u <0x80)
|
||||||
|
*outptr++ = u;
|
||||||
|
else {
|
||||||
if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
|
if (html->flags & CAMEL_MIME_FILTER_TOHTML_ESCAPE_8BIT)
|
||||||
*outptr++ = '?';
|
*outptr++ = '?';
|
||||||
else
|
else
|
||||||
outptr += g_snprintf (outptr, 9, "&#%d;", (int) u);
|
outptr += sprintf(outptr, "&#%u;", u);
|
||||||
} else {
|
|
||||||
*outptr++ = (char) u;
|
|
||||||
}
|
}
|
||||||
html->column++;
|
html->column++;
|
||||||
break;
|
break;
|
||||||
|
@ -83,6 +83,62 @@ loop:
|
|||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* camel_utf8_getc_limit:
|
||||||
|
* @ptr:
|
||||||
|
* @end: must not be NULL.
|
||||||
|
*
|
||||||
|
* Get the next utf8 char at @ptr, and return it, advancing @ptr to
|
||||||
|
* the next character. If @end is reached before a full utf8
|
||||||
|
* character can be read, then the invalid Unicode char 0xffff is
|
||||||
|
* returned as a sentinel (Unicode 3.1, section 2.7), and @ptr is not
|
||||||
|
* advanced.
|
||||||
|
*
|
||||||
|
* Return value: The next utf8 char, or 0xffff.
|
||||||
|
**/
|
||||||
|
guint32
|
||||||
|
camel_utf8_getc_limit(const unsigned char **ptr, const unsigned char *end)
|
||||||
|
{
|
||||||
|
register unsigned char *p = (unsigned char *)*ptr;
|
||||||
|
register unsigned char c, r;
|
||||||
|
register guint32 v = 0xffff, m;
|
||||||
|
|
||||||
|
again:
|
||||||
|
while (p < end) {
|
||||||
|
r = *p++;
|
||||||
|
loop:
|
||||||
|
if (r < 0x80) {
|
||||||
|
*ptr = p;
|
||||||
|
return r;
|
||||||
|
} else if (r < 0xf8) { /* valid start char? (max 4 octets) */
|
||||||
|
v = r;
|
||||||
|
m = 0x7f80; /* used to mask out the length bits */
|
||||||
|
do {
|
||||||
|
if (p >= end)
|
||||||
|
return 0xffff;
|
||||||
|
|
||||||
|
c = *p++;
|
||||||
|
if ((c & 0xc0) != 0x80) {
|
||||||
|
r = c;
|
||||||
|
goto loop;
|
||||||
|
}
|
||||||
|
v = (v<<6) | (c & 0x3f);
|
||||||
|
r<<=1;
|
||||||
|
m<<=5;
|
||||||
|
} while (r & 0x40);
|
||||||
|
|
||||||
|
*ptr = p;
|
||||||
|
|
||||||
|
v &= ~m;
|
||||||
|
return v;
|
||||||
|
} else {
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
g_string_append_u(GString *out, guint32 c)
|
g_string_append_u(GString *out, guint32 c)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user