New function to parse an HTML meta-tag.
2001-07-10 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-utils.c (html_meta_param_list_decode): New function to parse an HTML meta-tag. * camel-mime-part-utils.c (simple_data_wrapper_construct_from_parser): If the Content-Type did not contain a charset parameter and it's also a text/html part, we have 1 last place to look - in the META html tags. *sigh* * camel-mime-message.c (camel_mime_message_get_source): s/gint/unsigned since that's what it should be. svn path=/trunk/; revision=10976
This commit is contained in:

committed by
Jeffrey Stedfast

parent
456227c7b4
commit
e39d94c5ef
@ -1,3 +1,16 @@
|
||||
2001-07-10 Jeffrey Stedfast <fejj@ximian.com>
|
||||
|
||||
* camel-mime-utils.c (html_meta_param_list_decode): New function
|
||||
to parse an HTML meta-tag.
|
||||
|
||||
* camel-mime-part-utils.c
|
||||
(simple_data_wrapper_construct_from_parser): If the Content-Type
|
||||
did not contain a charset parameter and it's also a text/html
|
||||
part, we have 1 last place to look - in the META html tags. *sigh*
|
||||
|
||||
* camel-mime-message.c (camel_mime_message_get_source):
|
||||
s/gint/unsigned since that's what it should be.
|
||||
|
||||
2001-07-09 Jeffrey Stedfast <fejj@ximian.com>
|
||||
|
||||
* camel-pgp-context.c (pgp_sign): Forget the passphrase if the
|
||||
|
@ -171,7 +171,8 @@ camel_mime_message_get_type (void)
|
||||
return camel_mime_message_type;
|
||||
}
|
||||
|
||||
static void unref_recipient (gpointer key, gpointer value, gpointer user_data)
|
||||
static void
|
||||
unref_recipient (gpointer key, gpointer value, gpointer user_data)
|
||||
{
|
||||
camel_object_unref (CAMEL_OBJECT (value));
|
||||
}
|
||||
@ -193,6 +194,7 @@ camel_mime_message_set_date(CamelMimeMessage *message, time_t date, int offset)
|
||||
char *datestr;
|
||||
|
||||
g_assert(message);
|
||||
|
||||
if (date == CAMEL_MESSAGE_DATE_CURRENT) {
|
||||
struct tm *local;
|
||||
int tz;
|
||||
@ -418,10 +420,12 @@ const char *
|
||||
camel_mime_message_get_source (CamelMimeMessage *mime_message)
|
||||
{
|
||||
const char *src;
|
||||
|
||||
g_assert(mime_message);
|
||||
|
||||
src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source");
|
||||
if (src) {
|
||||
while (*src && isspace ((gint) *src))
|
||||
while (*src && isspace ((unsigned) *src))
|
||||
++src;
|
||||
}
|
||||
return src;
|
||||
@ -514,15 +518,15 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
|
||||
switch (header_type) {
|
||||
case HEADER_FROM:
|
||||
if (message->from)
|
||||
camel_object_unref((CamelObject *)message->from);
|
||||
camel_object_unref (CAMEL_OBJECT (message->from));
|
||||
message->from = camel_internet_address_new ();
|
||||
camel_address_decode((CamelAddress *)message->from, header_value);
|
||||
camel_address_decode (CAMEL_ADDRESS (message->from), header_value);
|
||||
break;
|
||||
case HEADER_REPLY_TO:
|
||||
if (message->reply_to)
|
||||
camel_object_unref((CamelObject *)message->reply_to);
|
||||
camel_object_unref (CAMEL_OBJECT (message->reply_to));
|
||||
message->reply_to = camel_internet_address_new ();
|
||||
camel_address_decode((CamelAddress *)message->reply_to, header_value);
|
||||
camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value);
|
||||
break;
|
||||
case HEADER_SUBJECT:
|
||||
g_free(message->subject);
|
||||
@ -555,6 +559,7 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
@ -733,11 +738,11 @@ find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBest
|
||||
it as binary data (and take the result we have so far) */
|
||||
|
||||
if (charenc != NULL) {
|
||||
|
||||
/* otherwise, try another pass, converting to the real charset */
|
||||
|
||||
camel_mime_filter_reset ((CamelMimeFilter *)bestenc);
|
||||
camel_mime_filter_bestenc_set_flags(bestenc, CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_LF_IS_CRLF|callerflags);
|
||||
camel_mime_filter_bestenc_set_flags (bestenc, CAMEL_BESTENC_GET_ENCODING |
|
||||
CAMEL_BESTENC_LF_IS_CRLF | callerflags);
|
||||
|
||||
camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
|
||||
camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
|
||||
@ -790,7 +795,8 @@ best_encoding(CamelMimeMessage *msg, CamelMimePart *part, void *datap)
|
||||
char *newct;
|
||||
|
||||
/* FIXME: ick, the part content_type interface needs fixing bigtime */
|
||||
header_content_type_set_param(part->content_type, "charset", charset?charset:"us-ascii");
|
||||
header_content_type_set_param (part->content_type, "charset",
|
||||
charset ? charset : "us-ascii");
|
||||
newct = header_content_type_format (part->content_type);
|
||||
if (newct) {
|
||||
d(printf("Setting content-type to %s\n", newct));
|
||||
|
@ -89,6 +89,7 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
|
||||
ct = camel_mime_parser_content_type (mp);
|
||||
if (header_content_type_is (ct, "text", "*")) {
|
||||
const char *charset = header_content_type_param (ct, "charset");
|
||||
char *acharset; /* to be alloca'd if needed */
|
||||
|
||||
if (fdec) {
|
||||
d(printf("Adding CRLF conversion filter\n"));
|
||||
@ -97,9 +98,67 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
|
||||
crlfid = camel_mime_parser_filter_add (mp, fcrlf);
|
||||
}
|
||||
|
||||
if (charset!=NULL
|
||||
&& !(strcasecmp(charset, "us-ascii")==0
|
||||
|| strcasecmp(charset, "utf-8")==0)) {
|
||||
/* Possible Lame Mailer Alert... check the META tag for a charset */
|
||||
if (!charset && header_content_type_is (ct, "text", "html")) {
|
||||
/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
|
||||
const char *data, *slashhead;
|
||||
CamelStream *mem;
|
||||
|
||||
mem = camel_stream_mem_new ();
|
||||
camel_data_wrapper_write_to_stream (dw, mem);
|
||||
camel_stream_write (mem, "", 1);
|
||||
|
||||
data = CAMEL_STREAM_MEM (mem)->buffer->data;
|
||||
slashhead = strstrcase (data, "</head");
|
||||
if (!slashhead)
|
||||
slashhead = data + CAMEL_STREAM_MEM (mem)->buffer->len;
|
||||
|
||||
/* Yea, this is ugly */
|
||||
while (data < slashhead) {
|
||||
struct _header_param *params;
|
||||
char *meta, *metaend;
|
||||
const char *val;
|
||||
|
||||
meta = strstrcase (data, "<meta");
|
||||
if (!meta)
|
||||
break;
|
||||
|
||||
metaend = strchr (meta, '>');
|
||||
if (!metaend)
|
||||
metaend = slashhead;
|
||||
|
||||
params = html_meta_param_list_decode (meta, metaend - meta);
|
||||
if (params) {
|
||||
val = header_param (params, "http-equiv");
|
||||
if (val && !g_strcasecmp (val, "Content-Type")) {
|
||||
struct _header_content_type *content_type;
|
||||
|
||||
content_type = header_content_type_decode (val);
|
||||
charset = header_content_type_param (content_type, "charset");
|
||||
if (charset) {
|
||||
acharset = alloca (strlen (charset) + 1);
|
||||
strcpy (acharset, charset);
|
||||
charset = acharset;
|
||||
}
|
||||
|
||||
header_content_type_unref (content_type);
|
||||
}
|
||||
|
||||
header_param_list_free (params);
|
||||
|
||||
/* break as soon as we find a charset */
|
||||
if (charset)
|
||||
break;
|
||||
}
|
||||
|
||||
data = metaend;
|
||||
}
|
||||
|
||||
camel_object_unref (CAMEL_OBJECT (mem));
|
||||
}
|
||||
|
||||
/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
|
||||
if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
|
||||
d(printf("Adding conversion filter from %s to UTF-8\n", charset));
|
||||
fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8");
|
||||
if (fch) {
|
||||
@ -108,7 +167,6 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
|
||||
g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
buffer = g_byte_array_new();
|
||||
|
@ -2719,6 +2719,58 @@ header_param_list_decode(const char *in)
|
||||
return header_decode_param_list(&in);
|
||||
}
|
||||
|
||||
struct _header_param *
|
||||
html_meta_param_list_decode (const char *in, int inlen)
|
||||
{
|
||||
struct _header_param *params = NULL, *last = NULL;
|
||||
const char *inptr, *inend;
|
||||
|
||||
if (in == NULL)
|
||||
return NULL;
|
||||
|
||||
inptr = in;
|
||||
inend = inptr + inlen;
|
||||
|
||||
if (*inptr != '<')
|
||||
return NULL;
|
||||
|
||||
if (!g_strncasecmp (inptr, "<meta", 5))
|
||||
inptr += 5;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
header_decode_lwsp (&inptr);
|
||||
|
||||
while (inptr < inend && *inptr != '>') {
|
||||
char *name = NULL, *value = NULL;
|
||||
struct _header_param *param;
|
||||
|
||||
name = decode_token (&inptr);
|
||||
header_decode_lwsp (&inptr);
|
||||
if (*inptr != '=') {
|
||||
g_free (name);
|
||||
break;
|
||||
}
|
||||
|
||||
value = header_decode_value (&inptr);
|
||||
header_decode_lwsp (&inptr);
|
||||
|
||||
param = g_malloc (sizeof (struct _header_param));
|
||||
param->next = NULL;
|
||||
param->name = name;
|
||||
param->value = value;
|
||||
|
||||
if (last) {
|
||||
last->next = param;
|
||||
last = param;
|
||||
} else {
|
||||
last = params = param;
|
||||
}
|
||||
}
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
|
||||
static char *
|
||||
header_encode_param (const unsigned char *in, gboolean *encoded)
|
||||
|
@ -109,6 +109,9 @@ void header_param_list_format_append(GString *out, struct _header_param *p);
|
||||
char *header_param_list_format(struct _header_param *p);
|
||||
void header_param_list_free(struct _header_param *p);
|
||||
|
||||
/* for decoding META tags in text/html stuff */
|
||||
struct _header_param *html_meta_param_list_decode (const char *in, int inlen);
|
||||
|
||||
/* Content-Type header */
|
||||
struct _header_content_type *header_content_type_new(const char *type, const char *subtype);
|
||||
struct _header_content_type *header_content_type_decode(const char *in);
|
||||
|
@ -209,7 +209,7 @@ strip (gchar *string, gchar c)
|
||||
}
|
||||
|
||||
char *
|
||||
strstrcase (char *haystack, const char *needle)
|
||||
strstrcase (const char *haystack, const char *needle)
|
||||
{
|
||||
/* find the needle in the haystack neglecting case */
|
||||
const char *ptr;
|
||||
|
@ -61,7 +61,7 @@ void string_unquote (gchar *string);
|
||||
|
||||
gchar *strip (gchar *string, gchar c);
|
||||
|
||||
char *strstrcase (char *haystack, const char *needle);
|
||||
char *strstrcase (const char *haystack, const char *needle);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Reference in New Issue
Block a user