When doing a contains match, split the words and perform an and on it.

2002-04-18 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (check_header): When doing a contains match, split the words and perform an and on it. (match_words_messages): If we have an index, but were forced to do a full search, first lookup a subset of messages using the index and a simplified word set. Only do a manual search of this subset. 2002-04-17 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (match_message_index): Changed to take a utf8 string not a regex pattern. (match_words_index): Matches against a camel_search_words list. (match_words_1message): Matches a single message against a camel_search_words list. (match_words_message): Same, but gets the message from the folder for you. (match_words_messages): Matches a list of messages against a words list. (search_body_contains): Rewritten to handle multiple word searches. For #23371. * providers/imap/camel-imap-search.c (sync_match): Split words when searching, to support multiple search words. Also, try searching specifying charset of utf8 if we can, if that fails, fall back to not specifying charset. TODO: It should translate the strings into the locale default charset? * providers/imap/camel-imap-store.c (connect_to_server): Added new cap - utf8_search, if set, we tell the server we're searching using utf8, otherwise we dont (incorrectly, since we always use utf8 to search). * camel-search-private.c (camel_ustrstrcase): Make this class public. (camel_search_words_split): Split a word into multiple words based on whitespace, and keep track of whether the word is simple (indexable directly), or not. (camel_search_words_free): Free 'em. svn path=/trunk/; revision=16501
2002-04-18 02:18:55 +00:00
parent e5e67a6644
commit 6ccd0e6f59
7 changed files with 484 additions and 110 deletions
--- a/camel/ChangeLog
+++ b/camel/ChangeLog
@ -1,3 +1,43 @@
+2002-04-18  Not Zed  <NotZed@Ximian.com>
+
+	* camel-folder-search.c (check_header): When doing a contains
+	match, split the words and perform an and on it.
+	(match_words_messages): If we have an index, but were forced to do
+	a full search, first lookup a subset of messages using
+	the index and a simplified word set.  Only do a manual search of
+	this subset.
+
+2002-04-17  Not Zed  <NotZed@Ximian.com>
+
+	* camel-folder-search.c (match_message_index): Changed to take a
+	utf8 string not a regex pattern.
+	(match_words_index): Matches against a camel_search_words list.
+	(match_words_1message): Matches a single message against a
+	camel_search_words list.
+	(match_words_message): Same, but gets the message from the folder
+	for you.
+	(match_words_messages): Matches a list of messages against a words
+	list.
+	(search_body_contains): Rewritten to handle multiple word
+	searches.  For #23371.
+
+	* providers/imap/camel-imap-search.c (sync_match): Split words
+	when searching, to support multiple search words.  Also, try
+	searching specifying charset of utf8 if we can, if that fails,
+	fall back to not specifying charset.  TODO: It should translate
+	the strings into the locale default charset?
+
+	* providers/imap/camel-imap-store.c (connect_to_server): Added new
+	cap - utf8_search, if set, we tell the server we're searching
+	using utf8, otherwise we dont (incorrectly, since we always use
+	utf8 to search).
+
+	* camel-search-private.c (camel_ustrstrcase): Make this class public.
+	(camel_search_words_split): Split a word into multiple words based
+	on whitespace, and keep track of whether the word is simple
+	(indexable directly), or not.
+	(camel_search_words_free): Free 'em.
+
 2002-04-17  Jeffrey Stedfast  <fejj@ximian.com>

 	* camel-vee-folder.c (vee_search_by_expression): If the vee-folder
--- a/camel/camel-folder-search.c
+++ b/camel/camel-folder-search.c
@ -621,8 +621,9 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
 		char *headername;
 		const char *header = NULL;
 		char strbuf[32];
-		int i;
+		int i, j;
 		camel_search_t type = CAMEL_SEARCH_TYPE_ASIS;
+		struct _camel_search_words *words;

 		/* only a subset of headers are supported .. */
 		headername = argv[0]->value.string;
@ -652,9 +653,21 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
 		if (header) {
 			/* performs an OR of all words */
 			for (i=1;i<argc && !truth;i++) {
-				if (argv[i]->type == ESEXP_RES_STRING)
-					truth = camel_search_header_match(header, argv[i]->value.string,
-									  how, type, NULL);
+				if (argv[i]->type == ESEXP_RES_STRING) {
+					if (argv[i]->value.string[0] == 0) {
+						truth = TRUE;
+					} else if (how == CAMEL_SEARCH_MATCH_CONTAINS) {
+						/* doesn't make sense to split words on anything but contains i.e. we can't have an ending match different words */
+						words = camel_search_words_split(argv[i]->value.string);
+						truth = TRUE;
+						for (j=0;j<words->len && truth;j++) {
+							truth = camel_search_header_match(header, words->words[j]->word, how, type, NULL);
+						}
+						camel_search_words_free(words);
+					} else {
+						truth = camel_search_header_match(header, argv[i]->value.string, how, type, NULL);
+					}
+				}
 			}
 		}
 	}
@ -723,64 +736,16 @@ g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup)
 	g_ptr_array_add(fuckup->uids, key);
 }

-static int
-match_message(CamelFolder *folder, const char *uid, regex_t *pattern, CamelException *ex)
+/* and, only store duplicates */
+static void
+g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup)
 {
-	CamelMimeMessage *msg;
-	int truth = FALSE;
-
-	msg = camel_folder_get_message(folder, uid, ex);
-	if (!camel_exception_is_set(ex) && msg!=NULL) {
-		truth = camel_search_message_body_contains((CamelDataWrapper *)msg, pattern);
-		camel_object_unref((CamelObject *)msg);
-	} else {
-		camel_exception_clear(ex);
-	}
-	return truth;
+	if (value == fuckup->count)
+		g_ptr_array_add(fuckup->uids, key);
 }

-/* perform a regex match against words in an index */
-/* uids = hash table of messageinfo's by uid's */
-static GPtrArray *
-match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelException *ex)
-{
-	GPtrArray *result = g_ptr_array_new();
-	GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
-	struct _glib_sux_donkeys lambdafoo;
-	CamelIndexCursor *wc, *nc;
-	const char *word, *name;
-	CamelMessageInfo *mi;
-			
-	wc = camel_index_words(idx);
-	if (wc) {
-		while ((word = camel_index_cursor_next(wc))) {
-			if (regexec(pattern, word, 0, NULL, 0) == 0) {
-				/* perf: could have the wc cursor return the name cursor */
-				nc = camel_index_find(idx, word);
-				if (nc) {
-					while ((name = camel_index_cursor_next(nc))) {
-						mi = g_hash_table_lookup(uids, name);
-						if (mi)
-							g_hash_table_insert(ht, (char *)camel_message_info_uid(mi), (void *)1);
-					}
-					camel_object_unref((CamelObject *)nc);
-				}
-			}
-		}
-		camel_object_unref((CamelObject *)wc);
-
-		lambdafoo.uids = result;
-		g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
-		g_hash_table_destroy(ht);
-	}
-
-	return result;
-}
-
-/* perform a regex match against an individual uid in an index */
-/* this would benefit greatly in practice if there was a hashtalbe of uid's to amtch against */
 static int
-match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelException *ex)
+match_message_index(CamelIndex *idx, const char *uid, const char *match, CamelException *ex)
 {
 	CamelIndexCursor *wc, *nc;
 	const char *word, *name;
@ -789,7 +754,7 @@ match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelExc
 	wc = camel_index_words(idx);
 	if (wc) {
 		while (!truth && (word = camel_index_cursor_next(wc))) {
-			if (regexec(pattern, word, 0, NULL, 0) == 0) {
+			if (camel_ustrstrcase(word,match) != NULL) {
 				/* perf: could have the wc cursor return the name cursor */
 				nc = camel_index_find(idx, word);
 				if (nc) {
@ -805,68 +770,227 @@ match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelExc
 	return truth;
 }

+/*
+ "one two" "three" "four five"
+
+  one and two
+or
+  three
+or
+  four and five
+*/
+
+/* returns messages which contain all words listed in words */
+static GPtrArray *
+match_words_index(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
+{
+	GPtrArray *result = g_ptr_array_new();
+	GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
+	struct _glib_sux_donkeys lambdafoo;
+	CamelIndexCursor *wc, *nc;
+	const char *word, *name;
+	CamelMessageInfo *mi;
+	int i;
+
+	/* we can have a maximum of 32 words, as we use it as the AND mask */
+			
+	wc = camel_index_words(search->body_index);
+	if (wc) {
+		while ((word = camel_index_cursor_next(wc))) {
+			for (i=0;i<words->len;i++) {
+				if (camel_ustrstrcase(word, words->words[i]->word) != NULL) {
+					/* perf: could have the wc cursor return the name cursor */
+					nc = camel_index_find(search->body_index, word);
+					if (nc) {
+						while ((name = camel_index_cursor_next(nc))) {
+							mi = g_hash_table_lookup(search->summary_hash, name);
+							if (mi) {
+								int mask;
+								const char *uid = camel_message_info_uid(mi);
+
+								mask = ((int)g_hash_table_lookup(ht, uid)) | (1<<i);
+								g_hash_table_insert(ht, (char *)uid, (void *)mask);
+							}
+						}
+						camel_object_unref((CamelObject *)nc);
+					}
+				}
+			}
+		}
+		camel_object_unref((CamelObject *)wc);
+
+		lambdafoo.uids = result;
+		lambdafoo.count = (1<<words->len) - 1;
+		g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo);
+		g_hash_table_destroy(ht);
+	}
+
+	return result;
+}
+
+static gboolean
+match_words_1message (CamelDataWrapper *object, struct _camel_search_words *words, guint32 *mask)
+{
+	CamelDataWrapper *containee;
+	int truth = FALSE;
+	int parts, i;
+	
+	containee = camel_medium_get_content_object (CAMEL_MEDIUM (object));
+	
+	if (containee == NULL)
+		return FALSE;
+	
+	/* using the object types is more accurate than using the mime/types */
+	if (CAMEL_IS_MULTIPART (containee)) {
+		parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
+		for (i = 0; i < parts && truth == FALSE; i++) {
+			CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part (CAMEL_MULTIPART (containee), i);
+			if (part)
+				truth = match_words_1message(part, words, mask);
+		}
+	} else if (CAMEL_IS_MIME_MESSAGE (containee)) {
+		/* for messages we only look at its contents */
+		truth = match_words_1message((CamelDataWrapper *)containee, words, mask);
+	} else if (header_content_type_is(CAMEL_DATA_WRAPPER (containee)->mime_type, "text", "*")) {
+		/* for all other text parts, we look inside, otherwise we dont care */
+		CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new ();
+
+		/* FIXME: The match should be part of a stream op */
+		camel_data_wrapper_write_to_stream (containee, CAMEL_STREAM (mem));
+		camel_stream_write (CAMEL_STREAM (mem), "", 1);
+		for (i=0;i<words->len;i++) {
+			/* FIXME: This is horridly slow, and should use a real search algorithm */
+			if (camel_ustrstrcase(mem->buffer->data, words->words[i]->word) != NULL) {
+				*mask |= (1<<i);
+				/* shortcut a match */
+				if (*mask == (1<<(words->len))-1)
+					return TRUE;
+			}
+		}
+		camel_object_unref (CAMEL_OBJECT (mem));
+	}
+	
+	return truth;
+}
+
+static gboolean
+match_words_message(CamelFolder *folder, const char *uid, struct _camel_search_words *words, CamelException *ex)
+{
+	guint32 mask;
+	CamelMimeMessage *msg;
+	int truth;
+
+	msg = camel_folder_get_message(folder, uid, ex);
+	if (msg) {
+		mask = 0;
+		truth = match_words_1message((CamelDataWrapper *)msg, words, &mask);
+		camel_object_unref((CamelObject *)msg);
+	} else {
+		camel_exception_clear(ex);
+		truth = FALSE;
+	}
+
+	return truth;
+}
+
+static GPtrArray *
+match_words_messages(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
+{
+	int i;
+	GPtrArray *matches = g_ptr_array_new();
+
+	if (search->body_index) {
+		GPtrArray *indexed;
+		struct _camel_search_words *simple;
+
+		simple = camel_search_words_simple(words);
+		indexed = match_words_index(search, simple, ex);
+		camel_search_words_free(simple);
+
+		for (i=0;i<indexed->len;i++) {
+			const char *uid = g_ptr_array_index(indexed, i);
+			
+			if (match_words_message(search->folder, uid, words, ex))
+				g_ptr_array_add(matches, (char *)uid);
+		}
+		
+		g_ptr_array_free(indexed, TRUE);
+	} else {
+		for (i=0;i<search->summary->len;i++) {
+			CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
+			const char *uid = camel_message_info_uid(info);
+			
+			if (match_words_message(search->folder, uid, words, ex))
+				g_ptr_array_add(matches, (char *)uid);
+		}
+	}
+
+	return matches;
+}
+
 static ESExpResult *
 search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search)
 {
-	ESExpResult *r;
-	int i;
-	regex_t pattern;
+	int i, j;
 	CamelException *ex = search->priv->ex;
+	struct _camel_search_words *words;
+	ESExpResult *r;
+	struct _glib_sux_donkeys lambdafoo;

-	if (search->current) {
+	if (search->current) {	
 		int truth = FALSE;

-		if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
+		if (argc == 1 && argv[0]->value.string[0] == 0) {
 			truth = TRUE;
-		} else if (search->body_index) {
-			if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
-				truth = match_message_index(search->body_index, camel_message_info_uid(search->current), &pattern, ex);
-				regfree(&pattern);
-			}
-		} else if (search->folder) {
-			/* we do a 'slow' direct search */
-			if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
-				truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern, ex);
-				regfree(&pattern);
-			}
 		} else {
-			g_warning("Cannot perform indexed body query with no index or folder set");
+			for (i=0;i<argc && !truth;i++) {
+				if (argv[i]->type == ESEXP_RES_STRING) {
+					words = camel_search_words_split(argv[i]->value.string);
+					truth = TRUE;
+					if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
+						for (j=0;j<words->len && truth;j++)
+							truth = match_message_index(search->body_index, camel_message_info_uid(search->current), words->words[j]->word, ex);
+					} else {
+						/* TODO: cache current message incase of multiple body search terms */
+						truth = match_words_message(search->folder, camel_message_info_uid(search->current), words, ex);
+					}
+					camel_search_words_free(words);
+				}
+			}
 		}
 		r = e_sexp_result_new(f, ESEXP_RES_BOOL);
 		r->value.bool = truth;
 	} else {
 		r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
+		r->value.ptrarray = g_ptr_array_new();

-		if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
-			/* optimise the match "" case - match everything */
-			r->value.ptrarray = g_ptr_array_new();
+		if (argc == 1 && argv[0]->value.string[0] == 0) {
 			for (i=0;i<search->summary->len;i++) {
 				CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
+
 				g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
 			}
-		} else if (search->body_index) {
-			if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
-				r->value.ptrarray = match_messages_index(search->body_index, &pattern, search->summary_hash, ex);
-				regfree(&pattern);
-			}
-		} else if (search->folder) {
-			/* do a slow search */
-			r->value.ptrarray = g_ptr_array_new();
-			if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
-				if (search->summary) {
-					for (i=0;i<search->summary->len;i++) {
-						CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
-
-						if (match_message(search->folder, camel_message_info_uid(info), &pattern, ex))
-							g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
-					}
-				} /* else?  we could always get the summary from the folder, but then
-				     we need to free it later somehow */
-				regfree(&pattern);
-			}
 		} else {
-			g_warning("Cannot perform indexed body query with no index or folder set");
-			r->value.ptrarray = g_ptr_array_new();
+			GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
+			GPtrArray *matches;
+
+			for (i=0;i<argc;i++) {
+				if (argv[i]->type == ESEXP_RES_STRING) {
+					words = camel_search_words_split(argv[i]->value.string);
+					if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
+						matches = match_words_index(search, words, ex);
+					} else {
+						matches = match_words_messages(search, words, ex);
+					}
+					for (j=0;j<matches->len;j++)
+						g_hash_table_insert(ht, matches->pdata[j], matches->pdata[j]);
+					g_ptr_array_free(matches, TRUE);
+					camel_search_words_free(words);
+				}
+			}
+			lambdafoo.uids = r->value.ptrarray;
+			g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
+			g_hash_table_destroy(ht);
 		}
 	}

--- a/camel/camel-search-private.c
+++ b/camel/camel-search-private.c
@ -194,6 +194,7 @@ header_soundex (const char *header, const char *match)
 	return truth;
 }

+/* FIXME: This is stupidly slow and needs to be removed */
 static gunichar
 utf8_get (const char **inp)
 {
@ -209,7 +210,7 @@ utf8_get (const char **inp)
 	return c;
 }

-static const char *
+const char *
 camel_ustrstrcase (const char *haystack, const char *needle)
 {
 	gunichar *nuni, *puni;
@ -469,9 +470,6 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
 	if (containee == NULL)
 		return FALSE;
 	
-	/* TODO: I find it odd that get_part and get_content_object do not
-	   add a reference, probably need fixing for multithreading */
-	
 	/* using the object types is more accurate than using the mime/types */
 	if (CAMEL_IS_MULTIPART (containee)) {
 		parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
@ -496,3 +494,155 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
 	return truth;
 }

+static __inline__ guint32
+camel_utf8_getc(const unsigned char **ptr)
+{
+	register unsigned char *p = (unsigned char *)*ptr;
+	register unsigned char c, r;
+	register guint32 v=0, /* this is only required because the stupid @@@%#%# compiler thinks it can be used uninitialised */
+		m;
+
+	r = *p++;
+loop:
+	if (r < 0x80) {
+		*ptr = p;
+		v = r;
+	} else if (r < 0xfe) { /* valid start char? */
+		v = r;
+		m = 0x7f80;	/* used to mask out the length bits */
+		do {
+			c = *p++;
+			if ((c & 0xc0) != 0x80) {
+				r = c;
+				goto loop;
+			}
+			v = (v<<6) | (c & 0x3f);
+			r<<=1;
+			m<<=5;
+		} while (r & 0x40);
+		
+		*ptr = p;
+
+		v &= ~m;
+	}
+
+	return v;
+}
+
+struct _camel_search_words *
+camel_search_words_split(const unsigned char *in)
+{
+	int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
+	GString *w;
+	struct _camel_search_word *word;
+	struct _camel_search_words *words;
+	GPtrArray *list = g_ptr_array_new();
+	guint32 c;
+	int utf8len;
+	char utf8[8];
+
+	words = g_malloc0(sizeof(*words));	
+	w = g_string_new("");
+
+	do {
+		c = camel_utf8_getc(&in);
+		if (c == 0 || g_unichar_isspace(c)) {
+			if (w->len) {
+				word = g_malloc0(sizeof(*word));
+				word->word = g_strdup(w->str);
+				word->type = type;
+				g_ptr_array_add(list, word);
+				all |= type;
+				type = CAMEL_SEARCH_WORD_SIMPLE;
+				g_string_truncate(w, 0);
+			}
+		} else {
+			if (!g_unichar_isalnum(c))
+				type = CAMEL_SEARCH_WORD_COMPLEX;
+			else
+				c = g_unichar_tolower(c);
+			if (c > 0x80)
+				type |= CAMEL_SEARCH_WORD_8BIT;
+
+			utf8len = g_unichar_to_utf8(c, utf8);
+			utf8[utf8len] = 0;
+			g_string_append(w, utf8);
+		}
+	} while (c);
+
+	g_string_free(w, TRUE);
+	words->len = list->len;
+	words->words = (struct _camel_search_word **)list->pdata;
+	words->type = all;
+	g_ptr_array_free(list, FALSE);
+
+	return words;
+}
+
+/* takes an existing 'words' list, and converts it to another consisting of
+   only simple words, with any punctuation etc stripped */
+struct _camel_search_words *
+camel_search_words_simple(struct _camel_search_words *wordin)
+{
+	int i;
+	const unsigned char *ptr, *start, *last;
+	int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
+	GPtrArray *list = g_ptr_array_new();
+	struct _camel_search_word *word;
+	struct _camel_search_words *words;
+	guint32 c;
+
+	words = g_malloc0(sizeof(*words));	
+
+	for (i=0;i<wordin->len;i++) {
+		if ((wordin->words[i]->type & CAMEL_SEARCH_WORD_COMPLEX) == 0) {
+			word = g_malloc0(sizeof(*word));
+			word->type = wordin->words[i]->type;
+			word->word = g_strdup(wordin->words[i]->word);
+			g_ptr_array_add(list, word);
+		} else {
+			ptr = wordin->words[i]->word;
+			start = last = ptr;
+			do {
+				c = camel_utf8_getc(&ptr);
+				if (c == 0 || !g_unichar_isalnum(c)) {
+					if (last > start) {
+						word = g_malloc0(sizeof(*word));
+						word->word = g_strndup(start, last-start);
+						word->type = type;
+						g_ptr_array_add(list, word);
+						all |= type;
+						type = CAMEL_SEARCH_WORD_SIMPLE;
+					}
+					start = ptr;
+				}
+				if (c > 0x80)
+					type = CAMEL_SEARCH_WORD_8BIT;
+				last = ptr;
+			} while (c);
+		}
+	}
+
+	words->len = list->len;
+	words->words = (struct _camel_search_word **)list->pdata;
+	words->type = all;
+	g_ptr_array_free(list, FALSE);
+
+	return words;
+}
+
+void
+camel_search_words_free(struct _camel_search_words *words)
+{
+	int i;
+
+	for (i=0;i<words->len;i++) {
+		struct _camel_search_word *word = words->words[i];
+
+		g_free(word->word);
+		g_free(word);
+	}
+	g_free(words->words);
+	g_free(words);
+}
+
--- a/camel/camel-search-private.h
+++ b/camel/camel-search-private.h
@ -21,6 +21,8 @@
 #ifndef _CAMEL_SEARCH_PRIVATE_H
 #define _CAMEL_SEARCH_PRIVATE_H

+#include <regex.h>
+
 typedef enum {
 	CAMEL_SEARCH_MATCH_START = 1<<0,
 	CAMEL_SEARCH_MATCH_END = 1<<1,
@ -52,4 +54,29 @@ gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *p
 gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset);
 gboolean camel_search_header_soundex(const char *header, const char *match);

+/* TODO: replace with a real search function */
+const char *camel_ustrstrcase(const char *haystack, const char *needle);
+
+/* Some crappy utility functions for handling multiple search words */
+enum _camel_search_word_t {
+	CAMEL_SEARCH_WORD_SIMPLE = 1,
+	CAMEL_SEARCH_WORD_COMPLEX = 2,
+	CAMEL_SEARCH_WORD_8BIT = 4,
+};
+struct _camel_search_word {
+	enum _camel_search_word_t type;
+	char *word;
+};
+
+struct _camel_search_words {
+	int len;
+	enum _camel_search_word_t type;	/* OR of all word types in list */
+	struct _camel_search_word **words;
+};
+
+struct _camel_search_words *camel_search_words_split(const unsigned char *in);
+struct _camel_search_words *camel_search_words_simple(struct _camel_search_words *wordin);
+void camel_search_words_free(struct _camel_search_words *);
+
 #endif /* ! _CAMEL_SEARCH_PRIVATE_H */
+
--- a/camel/providers/imap/camel-imap-search.c
+++ b/camel/providers/imap/camel-imap-search.c
@ -42,6 +42,7 @@
 #include "camel-mime-utils.h"	/* base64 encoding */

 #include "camel-seekable-stream.h"
+#include "camel-search-private.h"

 #define d(x) x

@ -304,10 +305,13 @@ static int
 sync_match(CamelImapSearch *is, struct _match_record *mr)
 {
 	char *p, *result, *lasts = NULL;
-	CamelImapResponse *response;
+	CamelImapResponse *response = NULL;
 	guint32 uid;
 	CamelFolder *folder = ((CamelFolderSearch *)is)->folder;
 	CamelImapStore *store = (CamelImapStore *)folder->parent_store;
+	struct _camel_search_words *words;
+	GString *search;
+	int i;

 	if (mr->lastuid >= is->lastuid && mr->validity == is->validity)
 		return 0;
@ -316,9 +320,36 @@ sync_match(CamelImapSearch *is, struct _match_record *mr)

 	/* TODO: Handle multiple search terms */

-	response = camel_imap_command (store, folder, NULL,
-				       "UID SEARCH UID %d:%d BODY \"%s\"",
-				       mr->lastuid+1, is->lastuid, mr->terms[0]);
+	/* This handles multiple search words within a single term */
+	words = camel_search_words_split(mr->terms[0]);
+	search = g_string_new("");
+	g_string_sprintfa(search, "UID %d:%d", mr->lastuid+1, is->lastuid);
+	for (i=0;i<words->len;i++) {
+		char *w = words->words[i]->word, c;
+
+		g_string_sprintfa(search, " BODY \"");
+		while ((c = *w++)) {
+			if (c == '\\' || c == '"')
+				g_string_append_c(search, '\\');
+			g_string_append_c(search, c);
+		}
+		g_string_append_c(search, '"');
+	}
+	camel_search_words_free(words);
+
+	/* We only try search using utf8 if its non us-ascii text? */
+	if ((words->type & CAMEL_SEARCH_WORD_8BIT) &&  (store->capabilities & IMAP_CAPABILITY_utf8_search)) {
+		response = camel_imap_command(store, folder, NULL,
+					      "UID SEARCH CHARSET UTF-8 %s", search->str);
+		/* We can't actually tell if we got a NO response, so assume always */
+		if (response == NULL)
+			store->capabilities &= ~IMAP_CAPABILITY_utf8_search;
+	}
+	if (response == NULL)
+		response = camel_imap_command (store, folder, NULL,
+					       "UID SEARCH %s", search->str);
+	g_string_free(search, TRUE);
+
 	if (!response)
 		return -1;
 	result = camel_imap_response_extract (store, response, "SEARCH", NULL);
--- a/camel/providers/imap/camel-imap-store.c
+++ b/camel/providers/imap/camel-imap-store.c
@ -342,7 +342,8 @@ connect_to_server (CamelService *service, CamelException *ex)
 	store->connected = TRUE;
 	
 	/* Find out the IMAP capabilities */
-	store->capabilities = 0;
+	/* We assume we have utf8 capable search until a failed search tells us otherwise */
+	store->capabilities = IMAP_CAPABILITY_utf8_search;
 	store->authtypes = g_hash_table_new (g_str_hash, g_str_equal);
 	response = camel_imap_command (store, NULL, ex, "CAPABILITY");
 	if (!response)
--- a/camel/providers/imap/camel-imap-store.h
+++ b/camel/providers/imap/camel-imap-store.h
@ -71,6 +71,7 @@ typedef enum {
 #define IMAP_CAPABILITY_UIDPLUS			(1 << 4)
 #define IMAP_CAPABILITY_LITERALPLUS		(1 << 5)
 #define IMAP_CAPABILITY_useful_lsub		(1 << 6)
+#define IMAP_CAPABILITY_utf8_search		(1 << 7)

 #define IMAP_PARAM_OVERRIDE_NAMESPACE		(1 << 0)
 #define IMAP_PARAM_CHECK_ALL			(1 << 1)