New function to parse an HTML meta-tag.

2001-07-10  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-mime-utils.c (html_meta_param_list_decode): New function
	to parse an HTML meta-tag.

	* camel-mime-part-utils.c
	(simple_data_wrapper_construct_from_parser): If the Content-Type
	did not contain a charset parameter and it's also a text/html
	part, we have 1 last place to look - in the META html tags. *sigh*

	* camel-mime-message.c (camel_mime_message_get_source):
	s/gint/unsigned since that's what it should be.

svn path=/trunk/; revision=10976
This commit is contained in:
Jeffrey Stedfast
2001-07-10 22:06:56 +00:00
committed by Jeffrey Stedfast
parent 456227c7b4
commit e39d94c5ef
7 changed files with 365 additions and 233 deletions

View File

@ -1,3 +1,16 @@
2001-07-10 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-utils.c (html_meta_param_list_decode): New function
to parse an HTML meta-tag.
* camel-mime-part-utils.c
(simple_data_wrapper_construct_from_parser): If the Content-Type
did not contain a charset parameter and it's also a text/html
part, we have 1 last place to look - in the META html tags. *sigh*
* camel-mime-message.c (camel_mime_message_get_source):
s/gint/unsigned since that's what it should be.
2001-07-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-pgp-context.c (pgp_sign): Forget the passphrase if the

View File

@ -171,7 +171,8 @@ camel_mime_message_get_type (void)
return camel_mime_message_type;
}
static void unref_recipient (gpointer key, gpointer value, gpointer user_data)
static void
unref_recipient (gpointer key, gpointer value, gpointer user_data)
{
camel_object_unref (CAMEL_OBJECT (value));
}
@ -193,6 +194,7 @@ camel_mime_message_set_date(CamelMimeMessage *message, time_t date, int offset)
char *datestr;
g_assert(message);
if (date == CAMEL_MESSAGE_DATE_CURRENT) {
struct tm *local;
int tz;
@ -418,10 +420,12 @@ const char *
camel_mime_message_get_source (CamelMimeMessage *mime_message)
{
const char *src;
g_assert(mime_message);
src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source");
if (src) {
while (*src && isspace ((gint) *src))
while (*src && isspace ((unsigned) *src))
++src;
}
return src;
@ -514,15 +518,15 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
switch (header_type) {
case HEADER_FROM:
if (message->from)
camel_object_unref((CamelObject *)message->from);
camel_object_unref (CAMEL_OBJECT (message->from));
message->from = camel_internet_address_new ();
camel_address_decode((CamelAddress *)message->from, header_value);
camel_address_decode (CAMEL_ADDRESS (message->from), header_value);
break;
case HEADER_REPLY_TO:
if (message->reply_to)
camel_object_unref((CamelObject *)message->reply_to);
camel_object_unref (CAMEL_OBJECT (message->reply_to));
message->reply_to = camel_internet_address_new ();
camel_address_decode((CamelAddress *)message->reply_to, header_value);
camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value);
break;
case HEADER_SUBJECT:
g_free(message->subject);
@ -555,6 +559,7 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
default:
return FALSE;
}
return TRUE;
}
@ -733,11 +738,11 @@ find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBest
it as binary data (and take the result we have so far) */
if (charenc != NULL) {
/* otherwise, try another pass, converting to the real charset */
camel_mime_filter_reset ((CamelMimeFilter *)bestenc);
camel_mime_filter_bestenc_set_flags(bestenc, CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_LF_IS_CRLF|callerflags);
camel_mime_filter_bestenc_set_flags (bestenc, CAMEL_BESTENC_GET_ENCODING |
CAMEL_BESTENC_LF_IS_CRLF | callerflags);
camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
@ -790,7 +795,8 @@ best_encoding(CamelMimeMessage *msg, CamelMimePart *part, void *datap)
char *newct;
/* FIXME: ick, the part content_type interface needs fixing bigtime */
header_content_type_set_param(part->content_type, "charset", charset?charset:"us-ascii");
header_content_type_set_param (part->content_type, "charset",
charset ? charset : "us-ascii");
newct = header_content_type_format (part->content_type);
if (newct) {
d(printf("Setting content-type to %s\n", newct));

View File

@ -89,6 +89,7 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
ct = camel_mime_parser_content_type (mp);
if (header_content_type_is (ct, "text", "*")) {
const char *charset = header_content_type_param (ct, "charset");
char *acharset; /* to be alloca'd if needed */
if (fdec) {
d(printf("Adding CRLF conversion filter\n"));
@ -97,9 +98,67 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
crlfid = camel_mime_parser_filter_add (mp, fcrlf);
}
if (charset!=NULL
&& !(strcasecmp(charset, "us-ascii")==0
|| strcasecmp(charset, "utf-8")==0)) {
/* Possible Lame Mailer Alert... check the META tag for a charset */
if (!charset && header_content_type_is (ct, "text", "html")) {
/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
const char *data, *slashhead;
CamelStream *mem;
mem = camel_stream_mem_new ();
camel_data_wrapper_write_to_stream (dw, mem);
camel_stream_write (mem, "", 1);
data = CAMEL_STREAM_MEM (mem)->buffer->data;
slashhead = strstrcase (data, "</head");
if (!slashhead)
slashhead = data + CAMEL_STREAM_MEM (mem)->buffer->len;
/* Yea, this is ugly */
while (data < slashhead) {
struct _header_param *params;
char *meta, *metaend;
const char *val;
meta = strstrcase (data, "<meta");
if (!meta)
break;
metaend = strchr (meta, '>');
if (!metaend)
metaend = slashhead;
params = html_meta_param_list_decode (meta, metaend - meta);
if (params) {
val = header_param (params, "http-equiv");
if (val && !g_strcasecmp (val, "Content-Type")) {
struct _header_content_type *content_type;
content_type = header_content_type_decode (val);
charset = header_content_type_param (content_type, "charset");
if (charset) {
acharset = alloca (strlen (charset) + 1);
strcpy (acharset, charset);
charset = acharset;
}
header_content_type_unref (content_type);
}
header_param_list_free (params);
/* break as soon as we find a charset */
if (charset)
break;
}
data = metaend;
}
camel_object_unref (CAMEL_OBJECT (mem));
}
/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
d(printf("Adding conversion filter from %s to UTF-8\n", charset));
fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8");
if (fch) {
@ -108,7 +167,6 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
}
}
}
buffer = g_byte_array_new();

View File

@ -2719,6 +2719,58 @@ header_param_list_decode(const char *in)
return header_decode_param_list(&in);
}
struct _header_param *
html_meta_param_list_decode (const char *in, int inlen)
{
struct _header_param *params = NULL, *last = NULL;
const char *inptr, *inend;
if (in == NULL)
return NULL;
inptr = in;
inend = inptr + inlen;
if (*inptr != '<')
return NULL;
if (!g_strncasecmp (inptr, "<meta", 5))
inptr += 5;
else
return NULL;
header_decode_lwsp (&inptr);
while (inptr < inend && *inptr != '>') {
char *name = NULL, *value = NULL;
struct _header_param *param;
name = decode_token (&inptr);
header_decode_lwsp (&inptr);
if (*inptr != '=') {
g_free (name);
break;
}
value = header_decode_value (&inptr);
header_decode_lwsp (&inptr);
param = g_malloc (sizeof (struct _header_param));
param->next = NULL;
param->name = name;
param->value = value;
if (last) {
last->next = param;
last = param;
} else {
last = params = param;
}
}
return params;
}
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
header_encode_param (const unsigned char *in, gboolean *encoded)

View File

@ -109,6 +109,9 @@ void header_param_list_format_append(GString *out, struct _header_param *p);
char *header_param_list_format(struct _header_param *p);
void header_param_list_free(struct _header_param *p);
/* for decoding META tags in text/html stuff */
struct _header_param *html_meta_param_list_decode (const char *in, int inlen);
/* Content-Type header */
struct _header_content_type *header_content_type_new(const char *type, const char *subtype);
struct _header_content_type *header_content_type_decode(const char *in);

View File

@ -209,7 +209,7 @@ strip (gchar *string, gchar c)
}
char *
strstrcase (char *haystack, const char *needle)
strstrcase (const char *haystack, const char *needle)
{
/* find the needle in the haystack neglecting case */
const char *ptr;

View File

@ -61,7 +61,7 @@ void string_unquote (gchar *string);
gchar *strip (gchar *string, gchar c);
char *strstrcase (char *haystack, const char *needle);
char *strstrcase (const char *haystack, const char *needle);
#ifdef __cplusplus
}