New function to parse an HTML meta-tag.

2001-07-10  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-mime-utils.c (html_meta_param_list_decode): New function
	to parse an HTML meta-tag.

	* camel-mime-part-utils.c
	(simple_data_wrapper_construct_from_parser): If the Content-Type
	did not contain a charset parameter and it's also a text/html
	part, we have 1 last place to look - in the META html tags. *sigh*

	* camel-mime-message.c (camel_mime_message_get_source):
	s/gint/unsigned since that's what it should be.

svn path=/trunk/; revision=10976
This commit is contained in:
Jeffrey Stedfast
2001-07-10 22:06:56 +00:00
committed by Jeffrey Stedfast
parent 456227c7b4
commit e39d94c5ef
7 changed files with 365 additions and 233 deletions

View File

@ -1,3 +1,16 @@
2001-07-10 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-utils.c (html_meta_param_list_decode): New function
to parse an HTML meta-tag.
* camel-mime-part-utils.c
(simple_data_wrapper_construct_from_parser): If the Content-Type
did not contain a charset parameter and it's also a text/html
part, we have 1 last place to look - in the META html tags. *sigh*
* camel-mime-message.c (camel_mime_message_get_source):
s/gint/unsigned since that's what it should be.
2001-07-09 Jeffrey Stedfast <fejj@ximian.com> 2001-07-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-pgp-context.c (pgp_sign): Forget the passphrase if the * camel-pgp-context.c (pgp_sign): Forget the passphrase if the

View File

@ -171,7 +171,8 @@ camel_mime_message_get_type (void)
return camel_mime_message_type; return camel_mime_message_type;
} }
static void unref_recipient (gpointer key, gpointer value, gpointer user_data) static void
unref_recipient (gpointer key, gpointer value, gpointer user_data)
{ {
camel_object_unref (CAMEL_OBJECT (value)); camel_object_unref (CAMEL_OBJECT (value));
} }
@ -193,6 +194,7 @@ camel_mime_message_set_date(CamelMimeMessage *message, time_t date, int offset)
char *datestr; char *datestr;
g_assert(message); g_assert(message);
if (date == CAMEL_MESSAGE_DATE_CURRENT) { if (date == CAMEL_MESSAGE_DATE_CURRENT) {
struct tm *local; struct tm *local;
int tz; int tz;
@ -418,10 +420,12 @@ const char *
camel_mime_message_get_source (CamelMimeMessage *mime_message) camel_mime_message_get_source (CamelMimeMessage *mime_message)
{ {
const char *src; const char *src;
g_assert(mime_message); g_assert(mime_message);
src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source"); src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source");
if (src) { if (src) {
while (*src && isspace ((gint) *src)) while (*src && isspace ((unsigned) *src))
++src; ++src;
} }
return src; return src;
@ -514,15 +518,15 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
switch (header_type) { switch (header_type) {
case HEADER_FROM: case HEADER_FROM:
if (message->from) if (message->from)
camel_object_unref((CamelObject *)message->from); camel_object_unref (CAMEL_OBJECT (message->from));
message->from = camel_internet_address_new (); message->from = camel_internet_address_new ();
camel_address_decode((CamelAddress *)message->from, header_value); camel_address_decode (CAMEL_ADDRESS (message->from), header_value);
break; break;
case HEADER_REPLY_TO: case HEADER_REPLY_TO:
if (message->reply_to) if (message->reply_to)
camel_object_unref((CamelObject *)message->reply_to); camel_object_unref (CAMEL_OBJECT (message->reply_to));
message->reply_to = camel_internet_address_new (); message->reply_to = camel_internet_address_new ();
camel_address_decode((CamelAddress *)message->reply_to, header_value); camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value);
break; break;
case HEADER_SUBJECT: case HEADER_SUBJECT:
g_free(message->subject); g_free(message->subject);
@ -555,6 +559,7 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
default: default:
return FALSE; return FALSE;
} }
return TRUE; return TRUE;
} }
@ -733,11 +738,11 @@ find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBest
it as binary data (and take the result we have so far) */ it as binary data (and take the result we have so far) */
if (charenc != NULL) { if (charenc != NULL) {
/* otherwise, try another pass, converting to the real charset */ /* otherwise, try another pass, converting to the real charset */
camel_mime_filter_reset ((CamelMimeFilter *)bestenc); camel_mime_filter_reset ((CamelMimeFilter *)bestenc);
camel_mime_filter_bestenc_set_flags(bestenc, CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_LF_IS_CRLF|callerflags); camel_mime_filter_bestenc_set_flags (bestenc, CAMEL_BESTENC_GET_ENCODING |
CAMEL_BESTENC_LF_IS_CRLF | callerflags);
camel_stream_filter_add (filter, (CamelMimeFilter *)charenc); camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc); camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
@ -790,7 +795,8 @@ best_encoding(CamelMimeMessage *msg, CamelMimePart *part, void *datap)
char *newct; char *newct;
/* FIXME: ick, the part content_type interface needs fixing bigtime */ /* FIXME: ick, the part content_type interface needs fixing bigtime */
header_content_type_set_param(part->content_type, "charset", charset?charset:"us-ascii"); header_content_type_set_param (part->content_type, "charset",
charset ? charset : "us-ascii");
newct = header_content_type_format (part->content_type); newct = header_content_type_format (part->content_type);
if (newct) { if (newct) {
d(printf("Setting content-type to %s\n", newct)); d(printf("Setting content-type to %s\n", newct));

View File

@ -89,6 +89,7 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
ct = camel_mime_parser_content_type (mp); ct = camel_mime_parser_content_type (mp);
if (header_content_type_is (ct, "text", "*")) { if (header_content_type_is (ct, "text", "*")) {
const char *charset = header_content_type_param (ct, "charset"); const char *charset = header_content_type_param (ct, "charset");
char *acharset; /* to be alloca'd if needed */
if (fdec) { if (fdec) {
d(printf("Adding CRLF conversion filter\n")); d(printf("Adding CRLF conversion filter\n"));
@ -97,9 +98,67 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
crlfid = camel_mime_parser_filter_add (mp, fcrlf); crlfid = camel_mime_parser_filter_add (mp, fcrlf);
} }
if (charset!=NULL /* Possible Lame Mailer Alert... check the META tag for a charset */
&& !(strcasecmp(charset, "us-ascii")==0 if (!charset && header_content_type_is (ct, "text", "html")) {
|| strcasecmp(charset, "utf-8")==0)) { /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
const char *data, *slashhead;
CamelStream *mem;
mem = camel_stream_mem_new ();
camel_data_wrapper_write_to_stream (dw, mem);
camel_stream_write (mem, "", 1);
data = CAMEL_STREAM_MEM (mem)->buffer->data;
slashhead = strstrcase (data, "</head");
if (!slashhead)
slashhead = data + CAMEL_STREAM_MEM (mem)->buffer->len;
/* Yea, this is ugly */
while (data < slashhead) {
struct _header_param *params;
char *meta, *metaend;
const char *val;
meta = strstrcase (data, "<meta");
if (!meta)
break;
metaend = strchr (meta, '>');
if (!metaend)
metaend = slashhead;
params = html_meta_param_list_decode (meta, metaend - meta);
if (params) {
val = header_param (params, "http-equiv");
if (val && !g_strcasecmp (val, "Content-Type")) {
struct _header_content_type *content_type;
content_type = header_content_type_decode (val);
charset = header_content_type_param (content_type, "charset");
if (charset) {
acharset = alloca (strlen (charset) + 1);
strcpy (acharset, charset);
charset = acharset;
}
header_content_type_unref (content_type);
}
header_param_list_free (params);
/* break as soon as we find a charset */
if (charset)
break;
}
data = metaend;
}
camel_object_unref (CAMEL_OBJECT (mem));
}
/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
d(printf("Adding conversion filter from %s to UTF-8\n", charset)); d(printf("Adding conversion filter from %s to UTF-8\n", charset));
fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8"); fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8");
if (fch) { if (fch) {
@ -108,7 +167,6 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset); g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
} }
} }
} }
buffer = g_byte_array_new(); buffer = g_byte_array_new();

View File

@ -2719,6 +2719,58 @@ header_param_list_decode(const char *in)
return header_decode_param_list(&in); return header_decode_param_list(&in);
} }
struct _header_param *
html_meta_param_list_decode (const char *in, int inlen)
{
struct _header_param *params = NULL, *last = NULL;
const char *inptr, *inend;
if (in == NULL)
return NULL;
inptr = in;
inend = inptr + inlen;
if (*inptr != '<')
return NULL;
if (!g_strncasecmp (inptr, "<meta", 5))
inptr += 5;
else
return NULL;
header_decode_lwsp (&inptr);
while (inptr < inend && *inptr != '>') {
char *name = NULL, *value = NULL;
struct _header_param *param;
name = decode_token (&inptr);
header_decode_lwsp (&inptr);
if (*inptr != '=') {
g_free (name);
break;
}
value = header_decode_value (&inptr);
header_decode_lwsp (&inptr);
param = g_malloc (sizeof (struct _header_param));
param->next = NULL;
param->name = name;
param->value = value;
if (last) {
last->next = param;
last = param;
} else {
last = params = param;
}
}
return params;
}
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */ /* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char * static char *
header_encode_param (const unsigned char *in, gboolean *encoded) header_encode_param (const unsigned char *in, gboolean *encoded)

View File

@ -109,6 +109,9 @@ void header_param_list_format_append(GString *out, struct _header_param *p);
char *header_param_list_format(struct _header_param *p); char *header_param_list_format(struct _header_param *p);
void header_param_list_free(struct _header_param *p); void header_param_list_free(struct _header_param *p);
/* for decoding META tags in text/html stuff */
struct _header_param *html_meta_param_list_decode (const char *in, int inlen);
/* Content-Type header */ /* Content-Type header */
struct _header_content_type *header_content_type_new(const char *type, const char *subtype); struct _header_content_type *header_content_type_new(const char *type, const char *subtype);
struct _header_content_type *header_content_type_decode(const char *in); struct _header_content_type *header_content_type_decode(const char *in);

View File

@ -209,7 +209,7 @@ strip (gchar *string, gchar c)
} }
char * char *
strstrcase (char *haystack, const char *needle) strstrcase (const char *haystack, const char *needle)
{ {
/* find the needle in the haystack neglecting case */ /* find the needle in the haystack neglecting case */
const char *ptr; const char *ptr;

View File

@ -61,7 +61,7 @@ void string_unquote (gchar *string);
gchar *strip (gchar *string, gchar c); gchar *strip (gchar *string, gchar c);
char *strstrcase (char *haystack, const char *needle); char *strstrcase (const char *haystack, const char *needle);
#ifdef __cplusplus #ifdef __cplusplus
} }