New function to parse an HTML meta-tag.

2001-07-10  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-mime-utils.c (html_meta_param_list_decode): New function
	to parse an HTML meta-tag.

	* camel-mime-part-utils.c
	(simple_data_wrapper_construct_from_parser): If the Content-Type
	did not contain a charset parameter and it's also a text/html
	part, we have 1 last place to look - in the META html tags. *sigh*

	* camel-mime-message.c (camel_mime_message_get_source):
	s/gint/unsigned since that's what it should be.

svn path=/trunk/; revision=10976
This commit is contained in:
Jeffrey Stedfast
2001-07-10 22:06:56 +00:00
committed by Jeffrey Stedfast
parent 456227c7b4
commit e39d94c5ef
7 changed files with 365 additions and 233 deletions

View File

@ -1,3 +1,16 @@
2001-07-10 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-utils.c (html_meta_param_list_decode): New function
to parse an HTML meta-tag.
* camel-mime-part-utils.c
(simple_data_wrapper_construct_from_parser): If the Content-Type
did not contain a charset parameter and it's also a text/html
part, we have 1 last place to look - in the META html tags. *sigh*
* camel-mime-message.c (camel_mime_message_get_source):
s/gint/unsigned since that's what it should be.
2001-07-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-pgp-context.c (pgp_sign): Forget the passphrase if the

View File

@ -138,18 +138,18 @@ camel_mime_message_finalize (CamelObject *object)
{
CamelMimeMessage *message = CAMEL_MIME_MESSAGE (object);
g_free(message->subject);
g_free(message->message_id);
g_free (message->subject);
g_free (message->message_id);
if (message->reply_to)
camel_object_unref((CamelObject *)message->reply_to);
camel_object_unref ((CamelObject *)message->reply_to);
if (message->from)
camel_object_unref((CamelObject *)message->from);
g_hash_table_foreach(message->recipients, unref_recipient, NULL);
g_hash_table_destroy(message->recipients);
camel_object_unref ((CamelObject *)message->from);
g_hash_table_foreach (message->recipients, unref_recipient, NULL);
g_hash_table_destroy (message->recipients);
}
@ -171,7 +171,8 @@ camel_mime_message_get_type (void)
return camel_mime_message_type;
}
static void unref_recipient (gpointer key, gpointer value, gpointer user_data)
static void
unref_recipient (gpointer key, gpointer value, gpointer user_data)
{
camel_object_unref (CAMEL_OBJECT (value));
}
@ -180,7 +181,7 @@ CamelMimeMessage *
camel_mime_message_new (void)
{
CamelMimeMessage *mime_message;
mime_message = CAMEL_MIME_MESSAGE(camel_object_new (CAMEL_MIME_MESSAGE_TYPE));
mime_message = CAMEL_MIME_MESSAGE (camel_object_new (CAMEL_MIME_MESSAGE_TYPE));
return mime_message;
}
@ -188,15 +189,16 @@ camel_mime_message_new (void)
/* **** Date: */
void
camel_mime_message_set_date(CamelMimeMessage *message, time_t date, int offset)
camel_mime_message_set_date (CamelMimeMessage *message, time_t date, int offset)
{
char *datestr;
g_assert(message);
if (date == CAMEL_MESSAGE_DATE_CURRENT) {
struct tm *local;
int tz;
date = time(0);
local = localtime(&date);
#if defined(HAVE_TIMEZONE)
@ -210,37 +212,37 @@ camel_mime_message_set_date(CamelMimeMessage *message, time_t date, int offset)
}
message->date = date;
message->date_offset = offset;
datestr = header_format_date(date, offset);
CAMEL_MEDIUM_CLASS(parent_class)->set_header((CamelMedium *)message, "Date", datestr);
g_free(datestr);
datestr = header_format_date (date, offset);
CAMEL_MEDIUM_CLASS (parent_class)->set_header ((CamelMedium *)message, "Date", datestr);
g_free (datestr);
}
time_t
camel_mime_message_get_date(CamelMimeMessage *msg, int *offset)
camel_mime_message_get_date (CamelMimeMessage *msg, int *offset)
{
if (offset)
*offset = msg->date_offset;
return msg->date;
}
time_t
camel_mime_message_get_date_received(CamelMimeMessage *msg, int *offset)
camel_mime_message_get_date_received (CamelMimeMessage *msg, int *offset)
{
if (msg->date_received == CAMEL_MESSAGE_DATE_CURRENT) {
const char *received;
received = camel_medium_get_header((CamelMedium *)msg, "received");
received = camel_medium_get_header ((CamelMedium *)msg, "received");
if (received)
received = strrchr(received, ';');
received = strrchr (received, ';');
if (received)
msg->date_received = header_decode_date(received + 1, &msg->date_received_offset);
msg->date_received = header_decode_date (received + 1, &msg->date_received_offset);
}
if (offset)
*offset = msg->date_received_offset;
return msg->date_received;
}
@ -283,43 +285,43 @@ camel_mime_message_set_reply_to (CamelMimeMessage *msg, const CamelInternetAddre
char *addr;
g_assert(msg);
if (msg->reply_to) {
camel_object_unref((CamelObject *)msg->reply_to);
camel_object_unref ((CamelObject *)msg->reply_to);
msg->reply_to = NULL;
}
if (reply_to == NULL) {
CAMEL_MEDIUM_CLASS(parent_class)->remove_header(CAMEL_MEDIUM(msg), "Reply-To");
CAMEL_MEDIUM_CLASS (parent_class)->remove_header (CAMEL_MEDIUM (msg), "Reply-To");
return;
}
msg->reply_to = (CamelInternetAddress *)camel_address_new_clone((CamelAddress *)reply_to);
addr = camel_address_encode((CamelAddress *)msg->reply_to);
CAMEL_MEDIUM_CLASS(parent_class)->set_header(CAMEL_MEDIUM(msg), "Reply-To", addr);
g_free(addr);
msg->reply_to = (CamelInternetAddress *)camel_address_new_clone ((CamelAddress *)reply_to);
addr = camel_address_encode ((CamelAddress *)msg->reply_to);
CAMEL_MEDIUM_CLASS (parent_class)->set_header (CAMEL_MEDIUM (msg), "Reply-To", addr);
g_free (addr);
}
const CamelInternetAddress *
camel_mime_message_get_reply_to(CamelMimeMessage *mime_message)
camel_mime_message_get_reply_to (CamelMimeMessage *mime_message)
{
g_assert (mime_message);
/* TODO: ref for threading? */
return mime_message->reply_to;
}
/* **** Subject: */
void
camel_mime_message_set_subject(CamelMimeMessage *mime_message, const char *subject)
camel_mime_message_set_subject (CamelMimeMessage *mime_message, const char *subject)
{
char *text;
g_assert(mime_message);
g_free(mime_message->subject);
g_free (mime_message->subject);
mime_message->subject = g_strstrip (g_strdup (subject));
text = header_encode_string((unsigned char *)mime_message->subject);
CAMEL_MEDIUM_CLASS(parent_class)->set_header(CAMEL_MEDIUM (mime_message), "Subject", text);
@ -327,10 +329,10 @@ camel_mime_message_set_subject(CamelMimeMessage *mime_message, const char *subje
}
const char *
camel_mime_message_get_subject(CamelMimeMessage *mime_message)
camel_mime_message_get_subject (CamelMimeMessage *mime_message)
{
g_assert(mime_message);
return mime_message->subject;
}
@ -340,22 +342,22 @@ camel_mime_message_get_subject(CamelMimeMessage *mime_message)
to cache the from (and reply_to) addresses as InternetAddresses internally, we
could just get it from the headers and reprocess every time. */
void
camel_mime_message_set_from(CamelMimeMessage *msg, const CamelInternetAddress *from)
camel_mime_message_set_from (CamelMimeMessage *msg, const CamelInternetAddress *from)
{
char *addr;
g_assert(msg);
if (msg->from) {
camel_object_unref((CamelObject *)msg->from);
msg->from = NULL;
}
if (from == NULL || camel_address_length((CamelAddress *)from) == 0) {
CAMEL_MEDIUM_CLASS(parent_class)->remove_header(CAMEL_MEDIUM(msg), "From");
return;
}
msg->from = (CamelInternetAddress *)camel_address_new_clone((CamelAddress *)from);
addr = camel_address_encode((CamelAddress *)msg->from);
CAMEL_MEDIUM_CLASS (parent_class)->set_header(CAMEL_MEDIUM(msg), "From", addr);
@ -363,12 +365,12 @@ camel_mime_message_set_from(CamelMimeMessage *msg, const CamelInternetAddress *f
}
const CamelInternetAddress *
camel_mime_message_get_from(CamelMimeMessage *mime_message)
camel_mime_message_get_from (CamelMimeMessage *mime_message)
{
g_assert (mime_message);
/* TODO: we should really ref this for multi-threading to work */
return mime_message->from;
}
@ -379,65 +381,67 @@ camel_mime_message_set_recipients(CamelMimeMessage *mime_message, const char *ty
{
char *text;
CamelInternetAddress *addr;
g_assert(mime_message);
addr = g_hash_table_lookup(mime_message->recipients, type);
addr = g_hash_table_lookup (mime_message->recipients, type);
if (addr == NULL) {
g_warning("trying to set a non-valid receipient type: %s", type);
g_warning ("trying to set a non-valid receipient type: %s", type);
return;
}
if (r == NULL || camel_address_length((CamelAddress *)r) == 0) {
camel_address_remove((CamelAddress *)addr, -1);
CAMEL_MEDIUM_CLASS(parent_class)->remove_header(CAMEL_MEDIUM(mime_message), type);
if (r == NULL || camel_address_length ((CamelAddress *)r) == 0) {
camel_address_remove ((CamelAddress *)addr, -1);
CAMEL_MEDIUM_CLASS (parent_class)->remove_header (CAMEL_MEDIUM (mime_message), type);
return;
}
/* note this does copy, and not append (cat) */
camel_address_copy((CamelAddress *)addr, (const CamelAddress *)r);
camel_address_copy ((CamelAddress *)addr, (const CamelAddress *)r);
/* and sync our headers */
text = camel_address_encode(CAMEL_ADDRESS(addr));
CAMEL_MEDIUM_CLASS(parent_class)->set_header(CAMEL_MEDIUM(mime_message), type, text);
text = camel_address_encode (CAMEL_ADDRESS (addr));
CAMEL_MEDIUM_CLASS (parent_class)->set_header (CAMEL_MEDIUM (mime_message), type, text);
g_free(text);
}
void
camel_mime_message_set_source(CamelMimeMessage *mime_message, const char *src)
camel_mime_message_set_source (CamelMimeMessage *mime_message, const char *src)
{
char *shrouded_src;
g_assert (mime_message);
shrouded_src = e_url_shroud (src);
camel_medium_add_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source", shrouded_src);
g_free (shrouded_src);
}
const char *
camel_mime_message_get_source(CamelMimeMessage *mime_message)
camel_mime_message_get_source (CamelMimeMessage *mime_message)
{
const char *src;
g_assert(mime_message);
src = camel_medium_get_header (CAMEL_MEDIUM (mime_message), "X-Evolution-Source");
if (src) {
while (*src && isspace ((gint) *src))
while (*src && isspace ((unsigned) *src))
++src;
}
return src;
}
const CamelInternetAddress *
camel_mime_message_get_recipients(CamelMimeMessage *mime_message, const char *type)
camel_mime_message_get_recipients (CamelMimeMessage *mime_message, const char *type)
{
g_assert(mime_message);
return g_hash_table_lookup(mime_message->recipients, type);
return g_hash_table_lookup (mime_message->recipients, type);
}
/* mime_message */
static int
construct_from_parser(CamelMimePart *dw, CamelMimeParser *mp)
construct_from_parser (CamelMimePart *dw, CamelMimeParser *mp)
{
char *buf;
int len;
@ -455,15 +459,15 @@ construct_from_parser(CamelMimePart *dw, CamelMimeParser *mp)
return -1;
/* ... then clean up the follow-on state */
state = camel_mime_parser_step(mp, &buf, &len);
state = camel_mime_parser_step (mp, &buf, &len);
switch (state) {
case HSCAN_EOF: case HSCAN_FROM_END: /* these doesn't belong to us */
camel_mime_parser_unstep(mp);
camel_mime_parser_unstep (mp);
case HSCAN_MESSAGE_END:
break;
default:
g_error("Bad parser state: Expecing MESSAGE_END or EOF or EOM, got: %d", camel_mime_parser_state(mp));
camel_mime_parser_unstep(mp);
g_error ("Bad parser state: Expecing MESSAGE_END or EOF or EOM, got: %d", camel_mime_parser_state (mp));
camel_mime_parser_unstep (mp);
return -1;
}
@ -478,27 +482,27 @@ static int
write_to_stream (CamelDataWrapper *data_wrapper, CamelStream *stream)
{
CamelMimeMessage *mm = CAMEL_MIME_MESSAGE (data_wrapper);
/* force mandatory headers ... */
if (mm->from == NULL) {
/* FIXME: should we just abort? Should we make one up? */
g_warning("No from set for message");
camel_medium_set_header((CamelMedium *)mm, "From", "");
g_warning ("No from set for message");
camel_medium_set_header ((CamelMedium *)mm, "From", "");
}
if (!camel_medium_get_header((CamelMedium *)mm, "Date"))
camel_mime_message_set_date(mm, CAMEL_MESSAGE_DATE_CURRENT, 0);
if (!camel_medium_get_header ((CamelMedium *)mm, "Date"))
camel_mime_message_set_date (mm, CAMEL_MESSAGE_DATE_CURRENT, 0);
if (mm->subject == NULL)
camel_mime_message_set_subject(mm, "No Subject");
camel_mime_message_set_subject (mm, "No Subject");
if (mm->message_id == NULL)
camel_mime_message_set_message_id (mm, NULL);
/* FIXME: "To" header needs to be set explicitly as well ... */
if (!camel_medium_get_header ((CamelMedium *)mm, "Mime-Version"))
camel_medium_set_header((CamelMedium *)mm, "Mime-Version", "1.0");
camel_medium_set_header ((CamelMedium *)mm, "Mime-Version", "1.0");
return CAMEL_DATA_WRAPPER_CLASS (parent_class)->write_to_stream (data_wrapper, stream);
}
@ -509,37 +513,37 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
CamelHeaderType header_type;
CamelMimeMessage *message = CAMEL_MIME_MESSAGE (medium);
CamelInternetAddress *addr;
header_type = (CamelHeaderType)g_hash_table_lookup(header_name_table, header_name);
header_type = (CamelHeaderType)g_hash_table_lookup (header_name_table, header_name);
switch (header_type) {
case HEADER_FROM:
if (message->from)
camel_object_unref((CamelObject *)message->from);
message->from = camel_internet_address_new();
camel_address_decode((CamelAddress *)message->from, header_value);
camel_object_unref (CAMEL_OBJECT (message->from));
message->from = camel_internet_address_new ();
camel_address_decode (CAMEL_ADDRESS (message->from), header_value);
break;
case HEADER_REPLY_TO:
if (message->reply_to)
camel_object_unref((CamelObject *)message->reply_to);
message->reply_to = camel_internet_address_new();
camel_address_decode((CamelAddress *)message->reply_to, header_value);
camel_object_unref (CAMEL_OBJECT (message->reply_to));
message->reply_to = camel_internet_address_new ();
camel_address_decode (CAMEL_ADDRESS (message->reply_to), header_value);
break;
case HEADER_SUBJECT:
g_free(message->subject);
message->subject = g_strstrip(header_decode_string(header_value));
message->subject = g_strstrip (header_decode_string (header_value));
break;
case HEADER_TO:
case HEADER_CC:
case HEADER_BCC:
addr = g_hash_table_lookup (message->recipients, header_name);
if (header_value)
camel_address_decode(CAMEL_ADDRESS (addr), header_value);
camel_address_decode (CAMEL_ADDRESS (addr), header_value);
else
camel_address_remove(CAMEL_ADDRESS (addr), -1);
camel_address_remove (CAMEL_ADDRESS (addr), -1);
break;
case HEADER_DATE:
if (header_value) {
message->date = header_decode_date(header_value, &message->date_offset);
message->date = header_decode_date (header_value, &message->date_offset);
} else {
message->date = CAMEL_MESSAGE_DATE_CURRENT;
message->date_offset = 0;
@ -555,96 +559,97 @@ process_header (CamelMedium *medium, const char *header_name, const char *header
default:
return FALSE;
}
return TRUE;
}
static void
set_header(CamelMedium *medium, const char *header_name, const void *header_value)
set_header (CamelMedium *medium, const char *header_name, const void *header_value)
{
process_header(medium, header_name, header_value);
process_header (medium, header_name, header_value);
parent_class->parent_class.set_header (medium, header_name, header_value);
}
static void
add_header(CamelMedium *medium, const char *header_name, const void *header_value)
add_header (CamelMedium *medium, const char *header_name, const void *header_value)
{
/* if we process it, then it must be forced unique as well ... */
if (process_header(medium, header_name, header_value))
if (process_header (medium, header_name, header_value))
parent_class->parent_class.set_header (medium, header_name, header_value);
else
parent_class->parent_class.add_header (medium, header_name, header_value);
}
static void
remove_header(CamelMedium *medium, const char *header_name)
remove_header (CamelMedium *medium, const char *header_name)
{
process_header(medium, header_name, NULL);
process_header (medium, header_name, NULL);
parent_class->parent_class.remove_header (medium, header_name);
}
typedef gboolean (*CamelPartFunc)(CamelMimeMessage *, CamelMimePart *, void *data);
static gboolean
message_foreach_part_rec(CamelMimeMessage *msg, CamelMimePart *part, CamelPartFunc callback, void *data)
message_foreach_part_rec (CamelMimeMessage *msg, CamelMimePart *part, CamelPartFunc callback, void *data)
{
CamelDataWrapper *containee;
int parts, i;
int go = TRUE;
if (callback(msg, part, data) == FALSE)
if (callback (msg, part, data) == FALSE)
return FALSE;
containee = camel_medium_get_content_object(CAMEL_MEDIUM(part));
containee = camel_medium_get_content_object (CAMEL_MEDIUM (part));
if (containee == NULL)
return go;
/* using the object types is more accurate than using the mime/types */
if (CAMEL_IS_MULTIPART(containee)) {
parts = camel_multipart_get_number(CAMEL_MULTIPART(containee));
for (i=0;go && i<parts;i++) {
CamelMimePart *part = camel_multipart_get_part(CAMEL_MULTIPART(containee), i);
go = message_foreach_part_rec(msg, part, callback, data);
if (CAMEL_IS_MULTIPART (containee)) {
parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
for (i = 0; go && i < parts; i++) {
CamelMimePart *part = camel_multipart_get_part (CAMEL_MULTIPART (containee), i);
go = message_foreach_part_rec (msg, part, callback, data);
}
} else if (CAMEL_IS_MIME_MESSAGE(containee)) {
go = message_foreach_part_rec(msg, (CamelMimePart *)containee, callback, data);
} else if (CAMEL_IS_MIME_MESSAGE (containee)) {
go = message_foreach_part_rec (msg, (CamelMimePart *)containee, callback, data);
}
return go;
}
/* dont make this public yet, it might need some more thinking ... */
/* MPZ */
static void
camel_mime_message_foreach_part(CamelMimeMessage *msg, CamelPartFunc callback, void *data)
camel_mime_message_foreach_part (CamelMimeMessage *msg, CamelPartFunc callback, void *data)
{
message_foreach_part_rec(msg, (CamelMimePart *)msg, callback, data);
message_foreach_part_rec (msg, (CamelMimePart *)msg, callback, data);
}
static gboolean
check_8bit(CamelMimeMessage *msg, CamelMimePart *part, void *data)
check_8bit (CamelMimeMessage *msg, CamelMimePart *part, void *data)
{
int *has8bit = data;
/* check this part, and stop as soon as we are done */
*has8bit = camel_mime_part_get_encoding(part) == CAMEL_MIME_PART_ENCODING_8BIT;
*has8bit = camel_mime_part_get_encoding (part) == CAMEL_MIME_PART_ENCODING_8BIT;
return !(*has8bit);
}
gboolean
camel_mime_message_has_8bit_parts(CamelMimeMessage *msg)
camel_mime_message_has_8bit_parts (CamelMimeMessage *msg)
{
int has8bit = FALSE;
camel_mime_message_foreach_part(msg, check_8bit, &has8bit);
camel_mime_message_foreach_part (msg, check_8bit, &has8bit);
return has8bit;
}
/* finds the best charset and transfer encoding for a given part */
static CamelMimePartEncodingType
find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBestencEncoding enctype, char **charsetp)
find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBestencEncoding enctype, char **charsetp)
{
const char *charsetin = NULL;
char *charset = NULL;
@ -657,111 +662,111 @@ find_best_encoding(CamelMimePart *part, CamelBestencRequired required, CamelBest
unsigned int flags, callerflags;
CamelMimePartEncodingType encoding;
CamelDataWrapper *content;
/* we use all these weird stream things so we can do it with streams, and
not have to read the whole lot into memory - although i have a feeling
it would make things a fair bit simpler to do so ... */
d(printf("starting to check part\n"));
content = camel_medium_get_content_object((CamelMedium *)part);
content = camel_medium_get_content_object ((CamelMedium *)part);
if (content == NULL) {
/* charset might not be right here, but it'll get the right stuff
if it is ever set */
*charsetp = NULL;
return CAMEL_MIME_PART_ENCODING_DEFAULT;
}
istext = header_content_type_is(part->content_type, "text", "*");
istext = header_content_type_is (part->content_type, "text", "*");
if (istext) {
flags = CAMEL_BESTENC_GET_CHARSET|CAMEL_BESTENC_GET_ENCODING;
flags = CAMEL_BESTENC_GET_CHARSET | CAMEL_BESTENC_GET_ENCODING;
} else {
flags = CAMEL_BESTENC_GET_ENCODING;
}
/* when building the message, any encoded parts are translated already */
flags |= CAMEL_BESTENC_LF_IS_CRLF;
/* and get any flags the caller passed in */
callerflags = (required & CAMEL_BESTENC_NO_FROM);
flags |= callerflags;
/* first a null stream, so any filtering is thrown away; we only want the sideeffects */
null = (CamelStream *)camel_stream_null_new();
filter = camel_stream_filter_new_with_stream(null);
null = (CamelStream *)camel_stream_null_new ();
filter = camel_stream_filter_new_with_stream (null);
/* if we're not looking for the best charset, then use the one we have */
if (istext && (required & CAMEL_BESTENC_GET_CHARSET) == 0
&& (charsetin = header_content_type_param(part->content_type, "charset"))) {
&& (charsetin = header_content_type_param (part->content_type, "charset"))) {
/* if libunicode doesn't support it, we dont really have utf8 anyway, so
we dont need a converter */
charenc = camel_mime_filter_charset_new_convert("UTF-8", charsetin);
charenc = camel_mime_filter_charset_new_convert ("UTF-8", charsetin);
if (charenc != NULL)
idc = camel_stream_filter_add(filter, (CamelMimeFilter *)charenc);
idc = camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
charsetin = NULL;
}
bestenc = camel_mime_filter_bestenc_new(flags);
idb = camel_stream_filter_add(filter, (CamelMimeFilter *)bestenc);
bestenc = camel_mime_filter_bestenc_new (flags);
idb = camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
d(printf("writing to checking stream\n"));
camel_data_wrapper_write_to_stream(content, (CamelStream *)filter);
camel_stream_filter_remove(filter, idb);
camel_data_wrapper_write_to_stream (content, (CamelStream *)filter);
camel_stream_filter_remove (filter, idb);
if (idc != -1) {
camel_stream_filter_remove(filter, idc);
camel_object_unref((CamelObject *)charenc);
camel_stream_filter_remove (filter, idc);
camel_object_unref ((CamelObject *)charenc);
charenc = NULL;
}
if (istext)
charsetin = camel_mime_filter_bestenc_get_best_charset(bestenc);
charsetin = camel_mime_filter_bestenc_get_best_charset (bestenc);
d(printf("charsetin = %s\n", charsetin));
/* if we have US-ASCII, or we're not doing text, we dont need to bother with the rest */
if (charsetin != NULL && (required & CAMEL_BESTENC_GET_CHARSET) != 0) {
charset = g_strdup(charsetin);
charset = g_strdup (charsetin);
d(printf("have charset, trying conversion/etc\n"));
/* now the 'bestenc' can has told us what the best encoding is, we can use that to create
a charset conversion filter as well, and then re-add the bestenc to filter the
result to find the best encoding to use as well */
charenc = camel_mime_filter_charset_new_convert("UTF-8", charset);
charenc = camel_mime_filter_charset_new_convert ("UTF-8", charset);
/* eek, libunicode doesn't undertand this charset anyway, then the 'utf8' we
thought we had is really the native format, in which case, we just treat
it as binary data (and take the result we have so far) */
if (charenc != NULL) {
/* otherwise, try another pass, converting to the real charset */
camel_mime_filter_reset((CamelMimeFilter *)bestenc);
camel_mime_filter_bestenc_set_flags(bestenc, CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_LF_IS_CRLF|callerflags);
camel_stream_filter_add(filter, (CamelMimeFilter *)charenc);
camel_stream_filter_add(filter, (CamelMimeFilter *)bestenc);
camel_mime_filter_reset ((CamelMimeFilter *)bestenc);
camel_mime_filter_bestenc_set_flags (bestenc, CAMEL_BESTENC_GET_ENCODING |
CAMEL_BESTENC_LF_IS_CRLF | callerflags);
camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
camel_stream_filter_add (filter, (CamelMimeFilter *)bestenc);
/* and write it to the new stream */
camel_data_wrapper_write_to_stream(content, (CamelStream *)filter);
camel_object_unref((CamelObject *)charenc);
camel_data_wrapper_write_to_stream (content, (CamelStream *)filter);
camel_object_unref ((CamelObject *)charenc);
}
}
encoding = camel_mime_filter_bestenc_get_best_encoding(bestenc, enctype);
camel_object_unref((CamelObject *)filter);
camel_object_unref((CamelObject *)bestenc);
camel_object_unref((CamelObject *)null);
encoding = camel_mime_filter_bestenc_get_best_encoding (bestenc, enctype);
camel_object_unref ((CamelObject *)filter);
camel_object_unref ((CamelObject *)bestenc);
camel_object_unref ((CamelObject *)null);
d(printf("done, best encoding = %d\n", encoding));
if (charsetp)
*charsetp = charset;
else
g_free(charset);
g_free (charset);
return encoding;
}
@ -771,57 +776,58 @@ struct _enc_data {
};
static gboolean
best_encoding(CamelMimeMessage *msg, CamelMimePart *part, void *datap)
best_encoding (CamelMimeMessage *msg, CamelMimePart *part, void *datap)
{
struct _enc_data *data = datap;
char *charset;
CamelMimePartEncodingType encoding;
/* we only care about actual content objects */
if (!CAMEL_IS_MULTIPART(part) && !CAMEL_IS_MIME_MESSAGE(part)) {
encoding = find_best_encoding(part, data->required, data->enctype, &charset);
if (!CAMEL_IS_MULTIPART (part) && !CAMEL_IS_MIME_MESSAGE (part)) {
encoding = find_best_encoding (part, data->required, data->enctype, &charset);
/* we always set the encoding, if we got this far. GET_CHARSET implies
also GET_ENCODING */
camel_mime_part_set_encoding(part, encoding);
camel_mime_part_set_encoding (part, encoding);
if ((data->required & CAMEL_BESTENC_GET_CHARSET) != 0) {
if (header_content_type_is(part->content_type, "text", "*")) {
if (header_content_type_is (part->content_type, "text", "*")) {
char *newct;
/* FIXME: ick, the part content_type interface needs fixing bigtime */
header_content_type_set_param(part->content_type, "charset", charset?charset:"us-ascii");
newct = header_content_type_format(part->content_type);
header_content_type_set_param (part->content_type, "charset",
charset ? charset : "us-ascii");
newct = header_content_type_format (part->content_type);
if (newct) {
d(printf("Setting content-type to %s\n", newct));
camel_mime_part_set_content_type(part, newct);
g_free(newct);
camel_mime_part_set_content_type (part, newct);
g_free (newct);
}
}
}
}
return TRUE;
}
void
camel_mime_message_set_best_encoding(CamelMimeMessage *msg, CamelBestencRequired required, CamelBestencEncoding enctype)
camel_mime_message_set_best_encoding (CamelMimeMessage *msg, CamelBestencRequired required, CamelBestencEncoding enctype)
{
struct _enc_data data;
if ((required & (CAMEL_BESTENC_GET_ENCODING|CAMEL_BESTENC_GET_CHARSET)) == 0)
return;
data.required = required;
data.enctype = enctype;
camel_mime_message_foreach_part(msg, best_encoding, &data);
camel_mime_message_foreach_part (msg, best_encoding, &data);
}
void
camel_mime_message_encode_8bit_parts (CamelMimeMessage *mime_message)
{
camel_mime_message_set_best_encoding(mime_message, CAMEL_BESTENC_GET_ENCODING, CAMEL_BESTENC_7BIT);
camel_mime_message_set_best_encoding (mime_message, CAMEL_BESTENC_GET_ENCODING, CAMEL_BESTENC_7BIT);
}

View File

@ -54,14 +54,14 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
CamelStream *source;
CamelSeekableStream *seekable_source = NULL;
char *encoding;
d(printf("constructing data-wrapper\n"));
/* Ok, try and be smart. If we're storing a small message (typical) convert it,
and store it in memory as we parse it ... if not, throw away the conversion
and scan till the end ... */
/* if we can't seek, dont have a stream/etc, then we must cache it */
/* Ok, try and be smart. If we're storing a small message (typical) convert it,
and store it in memory as we parse it ... if not, throw away the conversion
and scan till the end ... */
/* if we can't seek, dont have a stream/etc, then we must cache it */
source = camel_mime_parser_stream(mp);
if (source) {
camel_object_ref((CamelObject *)source);
@ -69,7 +69,7 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
seekable_source = CAMEL_SEEKABLE_STREAM (source);
}
}
/* first, work out conversion, if any, required, we dont care about what we dont know about */
encoding = header_content_encoding_decode(camel_mime_parser_header(mp, "content-transfer-encoding", NULL));
if (encoding) {
@ -84,39 +84,97 @@ simple_data_wrapper_construct_from_parser(CamelDataWrapper *dw, CamelMimeParser
}
g_free(encoding);
}
/* If we're doing text, we also need to do CRLF->LF and may have to convert it to UTF8 as well. */
ct = camel_mime_parser_content_type(mp);
if (header_content_type_is(ct, "text", "*")) {
const char *charset = header_content_type_param(ct, "charset");
ct = camel_mime_parser_content_type (mp);
if (header_content_type_is (ct, "text", "*")) {
const char *charset = header_content_type_param (ct, "charset");
char *acharset; /* to be alloca'd if needed */
if (fdec) {
d(printf("Adding CRLF conversion filter\n"));
fcrlf = (CamelMimeFilter *)camel_mime_filter_crlf_new(CAMEL_MIME_FILTER_CRLF_DECODE,
CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY);
crlfid = camel_mime_parser_filter_add(mp, fcrlf);
fcrlf = (CamelMimeFilter *)camel_mime_filter_crlf_new (CAMEL_MIME_FILTER_CRLF_DECODE,
CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY);
crlfid = camel_mime_parser_filter_add (mp, fcrlf);
}
if (charset!=NULL
&& !(strcasecmp(charset, "us-ascii")==0
|| strcasecmp(charset, "utf-8")==0)) {
/* Possible Lame Mailer Alert... check the META tag for a charset */
if (!charset && header_content_type_is (ct, "text", "html")) {
/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
const char *data, *slashhead;
CamelStream *mem;
mem = camel_stream_mem_new ();
camel_data_wrapper_write_to_stream (dw, mem);
camel_stream_write (mem, "", 1);
data = CAMEL_STREAM_MEM (mem)->buffer->data;
slashhead = strstrcase (data, "</head");
if (!slashhead)
slashhead = data + CAMEL_STREAM_MEM (mem)->buffer->len;
/* Yea, this is ugly */
while (data < slashhead) {
struct _header_param *params;
char *meta, *metaend;
const char *val;
meta = strstrcase (data, "<meta");
if (!meta)
break;
metaend = strchr (meta, '>');
if (!metaend)
metaend = slashhead;
params = html_meta_param_list_decode (meta, metaend - meta);
if (params) {
val = header_param (params, "http-equiv");
if (val && !g_strcasecmp (val, "Content-Type")) {
struct _header_content_type *content_type;
content_type = header_content_type_decode (val);
charset = header_content_type_param (content_type, "charset");
if (charset) {
acharset = alloca (strlen (charset) + 1);
strcpy (acharset, charset);
charset = acharset;
}
header_content_type_unref (content_type);
}
header_param_list_free (params);
/* break as soon as we find a charset */
if (charset)
break;
}
data = metaend;
}
camel_object_unref (CAMEL_OBJECT (mem));
}
/* if the charset is not us-ascii or utf-8, then we need to convert to utf-8 */
if (charset && !(g_strcasecmp (charset, "us-ascii") == 0 || g_strcasecmp (charset, "utf-8") == 0)) {
d(printf("Adding conversion filter from %s to UTF-8\n", charset));
fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert(charset, "UTF-8");
fch = (CamelMimeFilter *)camel_mime_filter_charset_new_convert (charset, "UTF-8");
if (fch) {
chrid = camel_mime_parser_filter_add(mp, (CamelMimeFilter *)fch);
chrid = camel_mime_parser_filter_add (mp, (CamelMimeFilter *)fch);
} else {
g_warning("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
g_warning ("Cannot convert '%s' to 'UTF-8', message display may be corrupt", charset);
}
}
}
buffer = g_byte_array_new();
if (seekable_source /* !cache */) {
start = camel_mime_parser_tell(mp) + seekable_source->bound_start;
}
while ( camel_mime_parser_step(mp, &buf, &len) != HSCAN_BODY_END ) {
while (camel_mime_parser_step (mp, &buf, &len) != HSCAN_BODY_END) {
d(printf("appending o/p data: %d: %.*s\n", len, len, buf));
if (buffer) {
if (buffer->len > 20480 && seekable_source) {

View File

@ -1626,18 +1626,18 @@ header_encode_phrase (const unsigned char *in)
/* these are all internal parser functions */
static char *
decode_token(const char **in)
decode_token (const char **in)
{
const char *inptr = *in;
const char *start;
header_decode_lwsp(&inptr);
header_decode_lwsp (&inptr);
start = inptr;
while (is_ttoken(*inptr))
while (is_ttoken (*inptr))
inptr++;
if (inptr>start) {
if (inptr > start) {
*in = inptr;
return g_strndup(start, inptr-start);
return g_strndup (start, inptr - start);
} else {
return NULL;
}
@ -2719,6 +2719,58 @@ header_param_list_decode(const char *in)
return header_decode_param_list(&in);
}
struct _header_param *
html_meta_param_list_decode (const char *in, int inlen)
{
struct _header_param *params = NULL, *last = NULL;
const char *inptr, *inend;
if (in == NULL)
return NULL;
inptr = in;
inend = inptr + inlen;
if (*inptr != '<')
return NULL;
if (!g_strncasecmp (inptr, "<meta", 5))
inptr += 5;
else
return NULL;
header_decode_lwsp (&inptr);
while (inptr < inend && *inptr != '>') {
char *name = NULL, *value = NULL;
struct _header_param *param;
name = decode_token (&inptr);
header_decode_lwsp (&inptr);
if (*inptr != '=') {
g_free (name);
break;
}
value = header_decode_value (&inptr);
header_decode_lwsp (&inptr);
param = g_malloc (sizeof (struct _header_param));
param->next = NULL;
param->name = name;
param->value = value;
if (last) {
last->next = param;
last = param;
} else {
last = params = param;
}
}
return params;
}
/* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
static char *
header_encode_param (const unsigned char *in, gboolean *encoded)

View File

@ -109,6 +109,9 @@ void header_param_list_format_append(GString *out, struct _header_param *p);
char *header_param_list_format(struct _header_param *p);
void header_param_list_free(struct _header_param *p);
/* for decoding META tags in text/html stuff */
struct _header_param *html_meta_param_list_decode (const char *in, int inlen);
/* Content-Type header */
struct _header_content_type *header_content_type_new(const char *type, const char *subtype);
struct _header_content_type *header_content_type_decode(const char *in);

View File

@ -209,7 +209,7 @@ strip (gchar *string, gchar c)
}
char *
strstrcase (char *haystack, const char *needle)
strstrcase (const char *haystack, const char *needle)
{
/* find the needle in the haystack neglecting case */
const char *ptr;

View File

@ -61,7 +61,7 @@ void string_unquote (gchar *string);
gchar *strip (gchar *string, gchar c);
char *strstrcase (char *haystack, const char *needle);
char *strstrcase (const char *haystack, const char *needle);
#ifdef __cplusplus
}