New function to parse an HTML meta-tag.

2001-07-10 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-utils.c (html_meta_param_list_decode): New function to parse an HTML meta-tag. * camel-mime-part-utils.c (simple_data_wrapper_construct_from_parser): If the Content-Type did not contain a charset parameter and it's also a text/html part, we have 1 last place to look - in the META html tags. *sigh* * camel-mime-message.c (camel_mime_message_get_source): s/gint/unsigned since that's what it should be. svn path=/trunk/; revision=10976
2001-07-10 22:06:56 +00:00
parent 456227c7b4
commit e39d94c5ef
7 changed files with 365 additions and 233 deletions
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@ -1,3 +1,16 @@
 2001-07-10  Jeffrey Stedfast  <fejj@ximian.com>
 	* camel-mime-utils.c (html_meta_param_list_decode): New function
 	to parse an HTML meta-tag.
 	* camel-mime-part-utils.c
 	(simple_data_wrapper_construct_from_parser): If the Content-Type
 	did not contain a charset parameter and it's also a text/html
 	part, we have 1 last place to look - in the META html tags. *sigh*
 	* camel-mime-message.c (camel_mime_message_get_source):
 	s/gint/unsigned since that's what it should be.
 2001-07-09  Jeffrey Stedfast  <fejj@ximian.com>
 	* camel-pgp-context.c (pgp_sign): Forget the passphrase if the
--- a/camel/camel-mime-message.c
+++ b/camel/camel-mime-message.c
@ -171,7 +171,8 @@ camel_mime_message_get_type (void)
 	return camel_mime_message_type;
 }
-static void unref_recipient (gpointer key, gpointer value, gpointer user_data)
+static void
 unref_recipient (gpointer key, gpointer value, gpointer user_data)
 {
 	camel_object_unref (CAMEL_OBJECT (value));
 }
@ -193,6 +194,7 @@ camel_mime_message_set_date(CamelMimeMessage *message,  time_t date, int offset)
 	char *datestr;
 	g_assert(message);
 	if (date == CAMEL_MESSAGE_DATE_CURRENT) {
 		struct tm *local;
 		int tz;
@ -418,10 +420,12 @@ const char *
 camel_mime_message_get_source (CamelMimeMessage *mime_message)
 {
 	const char *src;
 	g_assert(mime_message);
 	src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source");
 	if (src) {
-		while (*src && isspace ((gint) *src))
+		while (*src && isspace ((unsigned) *src))
 			++src;
 	}
 	return src;
@ -514,15 +518,15 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
 	switch (header_type) {
 	case HEADER_FROM:
 		if (message->from)
-			camel_object_unref((CamelObject *)message->from);
+			camel_object_unref (CAMEL_OBJECT (message->from));
 		message->from = camel_internet_address_new ();
-		camel_address_decode((CamelAddress *)message->from, header_value);
+		camel_address_decode (CAMEL_ADDRESS (message->from), header_value);
 		break;
 	case HEADER_REPLY_TO:
 		if (message->reply_to)
-			camel_object_unref((CamelObject *)message->reply_to);
+			camel_object_unref (CAMEL_OBJECT (message->reply_to));
 		message->reply_to = camel_internet_address_new ();
-		camel_address_decode((CamelAddress *)message->reply_to, header_value);
+		camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value);
 		break;
 	case HEADER_SUBJECT:
 		g_free(message->subject);
@ -555,6 +559,7 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
 	default:
 		return FALSE;
 	}
 	return TRUE;
 }
@ -733,11 +738,11 @@ find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBest
 		   it as binary data (and take the result we have so far) */
 		if (charenc != NULL) {
 			/* otherwise, try another pass, converting to the real charset */
 			camel_mime_filter_reset ((CamelMimeFilter *)bestenc);
-			camel_mime_filter_bestenc_set_flags(bestenc, CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_LF_IS_CRLF|callerflags);
+			camel_mime_filter_bestenc_set_flags (bestenc, CAMEL_BESTENC_GET_ENCODING |
 							     CAMEL_BESTENC_LF_IS_CRLF | callerflags);
 			camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
 			camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
@ -790,7 +795,8 @@ best_encoding(CamelMimeMessage *msg, CamelMimePart *part, void *datap)
 				char *newct;
 				/* FIXME: ick, the part content_type interface needs fixing bigtime */
-				header_content_type_set_param(part->content_type, "charset", charset?charset:"us-ascii");
+				header_content_type_set_param (part->content_type, "charset",
 							       charset ? charset : "us-ascii");
 				newct = header_content_type_format (part->content_type);
 				if (newct) {
 					d(printf("Setting content-type to %s\n", newct));
--- a/camel/camel-mime-part-utils.c
+++ b/camel/camel-mime-part-utils.c
@ -89,6 +89,7 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
 	ct = camel_mime_parser_content_type (mp);
 	if (header_content_type_is (ct, "text", "*")) {
 		const char *charset = header_content_type_param (ct, "charset");
 		char *acharset; /* to be alloca'd if needed */
 		if (fdec) {
 			d(printf("Adding CRLF conversion filter\n"));
@ -97,9 +98,67 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
 			crlfid = camel_mime_parser_filter_add (mp, fcrlf);
 		}
-		if (charset!=NULL
+		/* Possible Lame Mailer Alert... check the META tag for a charset */
-		    && !(strcasecmp(charset, "us-ascii")==0
+		if (!charset && header_content_type_is (ct, "text", "html")) {
-			 || strcasecmp(charset, "utf-8")==0)) {
+			/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
 			const char *data, *slashhead;
 			CamelStream *mem;
 			mem = camel_stream_mem_new ();
 			camel_data_wrapper_write_to_stream (dw, mem);
 			camel_stream_write (mem, "", 1);
 			data = CAMEL_STREAM_MEM (mem)->buffer->data;
 			slashhead = strstrcase (data, "</head");
 			if (!slashhead)
 				slashhead = data + CAMEL_STREAM_MEM (mem)->buffer->len;
 			/* Yea, this is ugly */
 			while (data < slashhead) {
 				struct _header_param *params;
 				char *meta, *metaend;
 				const char *val;
 				meta = strstrcase (data, "<meta");
 				if (!meta)
 					break;
 				metaend = strchr (meta, '>');
 				if (!metaend)
 					metaend = slashhead;
 				params = html_meta_param_list_decode (meta, metaend - meta);
 				if (params) {
 					val = header_param (params, "http-equiv");
 					if (val && !g_strcasecmp (val, "Content-Type")) {
 						struct _header_content_type *content_type;
 						content_type = header_content_type_decode (val);
 						charset = header_content_type_param (content_type, "charset");
 						if (charset) {
 							acharset = alloca (strlen (charset) + 1);
 							strcpy (acharset, charset);
 							charset = acharset;
 						}
 						header_content_type_unref (content_type);
 					}
 					header_param_list_free (params);
 					/* break as soon as we find a charset */
 					if (charset)
 						break;
 				}
 				data = metaend;
 			}
 			camel_object_unref (CAMEL_OBJECT (mem));
 		}
 		/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
 		if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
 			d(printf("Adding conversion filter from %s to UTF-8\n", charset));
 			fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8");
 			if (fch) {
@ -108,7 +167,6 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
 				g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
 			}
 		}
 	}
 	buffer = g_byte_array_new();
--- a/camel/camel-mime-utils.c
+++ b/camel/camel-mime-utils.c
@ -2719,6 +2719,58 @@ header_param_list_decode(const char *in)
 	return header_decode_param_list(&in);
 }
 struct _header_param *
 html_meta_param_list_decode (const char *in, int inlen)
 {
 	struct _header_param *params = NULL, *last = NULL;
 	const char *inptr, *inend;
 	if (in == NULL)
 		return NULL;
 	inptr = in;
 	inend = inptr + inlen;
 	if (*inptr != '<')
 		return NULL;
 	if (!g_strncasecmp (inptr, "<meta", 5))
 		inptr += 5;
 	else
 		return NULL;
 	header_decode_lwsp (&inptr);
 	while (inptr < inend && *inptr != '>') {
 		char *name = NULL, *value = NULL;
 		struct _header_param *param;
 		name = decode_token (&inptr);
 		header_decode_lwsp (&inptr);
 		if (*inptr != '=') {
 			g_free (name);
 			break;
 		}
 		value = header_decode_value (&inptr);
 		header_decode_lwsp (&inptr);
 		param = g_malloc (sizeof (struct _header_param));
 		param->next = NULL;
 		param->name = name;
 		param->value = value;
 		if (last) {
 			last->next = param;
 			last = param;
 		} else {
 			last = params = param;
 		}
 	}
 	return params;
 }
 /* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
 static char *
 header_encode_param (const unsigned char *in, gboolean *encoded)
--- a/camel/camel-mime-utils.h
+++ b/camel/camel-mime-utils.h
@ -109,6 +109,9 @@ void header_param_list_format_append(GString *out, struct _header_param *p);
 char *header_param_list_format(struct _header_param *p);
 void header_param_list_free(struct _header_param *p);
 /* for decoding META tags in text/html stuff */
 struct _header_param *html_meta_param_list_decode (const char *in, int inlen);
 /* Content-Type header */
 struct _header_content_type *header_content_type_new(const char *type, const char *subtype);
 struct _header_content_type *header_content_type_decode(const char *in);
--- a/camel/string-utils.c
+++ b/camel/string-utils.c
@ -209,7 +209,7 @@ strip (gchar *string, gchar c)
 }
 char *
-strstrcase (char *haystack, const char *needle)
+strstrcase (const char *haystack, const char *needle)
 {
 	/* find the needle in the haystack neglecting case */
 	const char *ptr;
--- a/camel/string-utils.h
+++ b/camel/string-utils.h
@ -61,7 +61,7 @@ void    string_unquote          (gchar *string);
 gchar   *strip                  (gchar *string, gchar c);
-char    *strstrcase             (char *haystack, const char *needle);
+char    *strstrcase             (const char *haystack, const char *needle);
 #ifdef __cplusplus
 }