From 4f2121cdcf59b27be833ebe6041bbdeec717bf23 Mon Sep 17 00:00:00 2001 From: Milan Crha Date: Fri, 15 Sep 2023 10:44:35 +0200 Subject: [PATCH] I#1436 - Mail: Correct conversion of UTF-16 encoded text files Closes https://gitlab.gnome.org/GNOME/evolution/-/issues/1436 --- src/e-util/e-attachment.c | 16 ++++++++++---- src/em-format/e-mail-formatter.c | 37 +++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/src/e-util/e-attachment.c b/src/e-util/e-attachment.c index 97b7a490e0..43776b04b2 100644 --- a/src/e-util/e-attachment.c +++ b/src/e-util/e-attachment.c @@ -1222,7 +1222,7 @@ e_attachment_add_to_multipart (EAttachment *attachment, stream = camel_stream_null_new (); filtered_stream = camel_stream_filter_new (stream); filter = camel_mime_filter_bestenc_new ( - CAMEL_BESTENC_GET_ENCODING); + CAMEL_BESTENC_GET_ENCODING | CAMEL_BESTENC_GET_CHARSET); camel_stream_filter_add ( CAMEL_STREAM_FILTER (filtered_stream), CAMEL_MIME_FILTER (filter)); @@ -1236,7 +1236,6 @@ e_attachment_add_to_multipart (EAttachment *attachment, CAMEL_MIME_FILTER_BESTENC (filter), CAMEL_BESTENC_8BIT); camel_mime_part_set_encoding (mime_part, encoding); - g_object_unref (filter); if (encoding == CAMEL_TRANSFER_ENCODING_7BIT) { /* The text fits within us-ascii, so this is safe. @@ -1254,13 +1253,22 @@ e_attachment_add_to_multipart (EAttachment *attachment, if (charset == NULL) { gchar *type; - camel_content_type_set_param ( - content_type, "charset", default_charset); + if (encoding != CAMEL_TRANSFER_ENCODING_7BIT) { + charset = camel_mime_filter_bestenc_get_best_charset (CAMEL_MIME_FILTER_BESTENC (filter)); + if (charset) + camel_content_type_set_param (content_type, "charset", charset); + } + + if (!charset) + camel_content_type_set_param (content_type, "charset", default_charset); + type = camel_content_type_format (content_type); camel_mime_part_set_content_type (mime_part, type); g_free (type); } + g_object_unref (filter); + /* Otherwise, unless it's a message/rfc822, Base64 encode it. */ } else if (!CAMEL_IS_MIME_MESSAGE (wrapper)) camel_mime_part_set_encoding ( diff --git a/src/em-format/e-mail-formatter.c b/src/em-format/e-mail-formatter.c index ed6ae2dd32..89349f2a01 100644 --- a/src/em-format/e-mail-formatter.c +++ b/src/em-format/e-mail-formatter.c @@ -1061,6 +1061,35 @@ e_mail_formatter_format_as (EMailFormatter *formatter, return ok; } +static gboolean +emf_data_is_utf16 (CamelMimePart *part, + gboolean *out_be_variant) +{ + CamelStream *filtered_stream; + CamelMimeFilter *filter; + CamelStream *stream; + const gchar *charset; + gboolean is_utf16; + + g_return_val_if_fail (CAMEL_IS_MIME_PART (part), FALSE); + + stream = camel_stream_null_new (); + filtered_stream = camel_stream_filter_new (stream); + filter = camel_mime_filter_bestenc_new (CAMEL_BESTENC_GET_CHARSET); + camel_stream_filter_add (CAMEL_STREAM_FILTER (filtered_stream), CAMEL_MIME_FILTER (filter)); + camel_data_wrapper_decode_to_stream_sync (camel_medium_get_content (CAMEL_MEDIUM (part)), filtered_stream, NULL, NULL); + g_object_unref (filtered_stream); + g_object_unref (stream); + + charset = camel_mime_filter_bestenc_get_best_charset (CAMEL_MIME_FILTER_BESTENC (filter)); + *out_be_variant = g_strcmp0 (charset, "UTF-16BE") == 0; + is_utf16 = *out_be_variant || g_strcmp0 (charset, "UTF-16LE") == 0; + + g_object_unref (filter); + + return is_utf16; +} + /** * em_format_format_text: * @part: an #EMailPart to decode @@ -1081,6 +1110,7 @@ e_mail_formatter_format_text (EMailFormatter *formatter, CamelMimeFilter *windows = NULL; CamelMimePart *mime_part; CamelContentType *mime_type; + gboolean utf16_be_variant = FALSE; if (g_cancellable_is_cancelled (cancellable)) return; @@ -1088,7 +1118,12 @@ e_mail_formatter_format_text (EMailFormatter *formatter, mime_part = e_mail_part_ref_mime_part (part); mime_type = camel_data_wrapper_get_mime_type_field (CAMEL_DATA_WRAPPER (mime_part)); - if (formatter->priv->charset != NULL) { + if (emf_data_is_utf16 (mime_part, &utf16_be_variant)) { + if (utf16_be_variant) + charset = "UTF-16BE"; + else + charset = "UTF-16LE"; + } else if (formatter->priv->charset != NULL) { charset = formatter->priv->charset; } else if (mime_type != NULL && (charset = camel_content_type_param (mime_type, "charset"))