When doing a contains match, split the words and perform an and on it.
2002-04-18 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (check_header): When doing a contains match, split the words and perform an and on it. (match_words_messages): If we have an index, but were forced to do a full search, first lookup a subset of messages using the index and a simplified word set. Only do a manual search of this subset. 2002-04-17 Not Zed <NotZed@Ximian.com> * camel-folder-search.c (match_message_index): Changed to take a utf8 string not a regex pattern. (match_words_index): Matches against a camel_search_words list. (match_words_1message): Matches a single message against a camel_search_words list. (match_words_message): Same, but gets the message from the folder for you. (match_words_messages): Matches a list of messages against a words list. (search_body_contains): Rewritten to handle multiple word searches. For #23371. * providers/imap/camel-imap-search.c (sync_match): Split words when searching, to support multiple search words. Also, try searching specifying charset of utf8 if we can, if that fails, fall back to not specifying charset. TODO: It should translate the strings into the locale default charset? * providers/imap/camel-imap-store.c (connect_to_server): Added new cap - utf8_search, if set, we tell the server we're searching using utf8, otherwise we dont (incorrectly, since we always use utf8 to search). * camel-search-private.c (camel_ustrstrcase): Make this class public. (camel_search_words_split): Split a word into multiple words based on whitespace, and keep track of whether the word is simple (indexable directly), or not. (camel_search_words_free): Free 'em. svn path=/trunk/; revision=16501
This commit is contained in:
@ -1,3 +1,43 @@
|
||||
2002-04-18 Not Zed <NotZed@Ximian.com>
|
||||
|
||||
* camel-folder-search.c (check_header): When doing a contains
|
||||
match, split the words and perform an and on it.
|
||||
(match_words_messages): If we have an index, but were forced to do
|
||||
a full search, first lookup a subset of messages using
|
||||
the index and a simplified word set. Only do a manual search of
|
||||
this subset.
|
||||
|
||||
2002-04-17 Not Zed <NotZed@Ximian.com>
|
||||
|
||||
* camel-folder-search.c (match_message_index): Changed to take a
|
||||
utf8 string not a regex pattern.
|
||||
(match_words_index): Matches against a camel_search_words list.
|
||||
(match_words_1message): Matches a single message against a
|
||||
camel_search_words list.
|
||||
(match_words_message): Same, but gets the message from the folder
|
||||
for you.
|
||||
(match_words_messages): Matches a list of messages against a words
|
||||
list.
|
||||
(search_body_contains): Rewritten to handle multiple word
|
||||
searches. For #23371.
|
||||
|
||||
* providers/imap/camel-imap-search.c (sync_match): Split words
|
||||
when searching, to support multiple search words. Also, try
|
||||
searching specifying charset of utf8 if we can, if that fails,
|
||||
fall back to not specifying charset. TODO: It should translate
|
||||
the strings into the locale default charset?
|
||||
|
||||
* providers/imap/camel-imap-store.c (connect_to_server): Added new
|
||||
cap - utf8_search, if set, we tell the server we're searching
|
||||
using utf8, otherwise we dont (incorrectly, since we always use
|
||||
utf8 to search).
|
||||
|
||||
* camel-search-private.c (camel_ustrstrcase): Make this class public.
|
||||
(camel_search_words_split): Split a word into multiple words based
|
||||
on whitespace, and keep track of whether the word is simple
|
||||
(indexable directly), or not.
|
||||
(camel_search_words_free): Free 'em.
|
||||
|
||||
2002-04-17 Jeffrey Stedfast <fejj@ximian.com>
|
||||
|
||||
* camel-vee-folder.c (vee_search_by_expression): If the vee-folder
|
||||
|
||||
@ -621,8 +621,9 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
|
||||
char *headername;
|
||||
const char *header = NULL;
|
||||
char strbuf[32];
|
||||
int i;
|
||||
int i, j;
|
||||
camel_search_t type = CAMEL_SEARCH_TYPE_ASIS;
|
||||
struct _camel_search_words *words;
|
||||
|
||||
/* only a subset of headers are supported .. */
|
||||
headername = argv[0]->value.string;
|
||||
@ -652,9 +653,21 @@ check_header(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolder
|
||||
if (header) {
|
||||
/* performs an OR of all words */
|
||||
for (i=1;i<argc && !truth;i++) {
|
||||
if (argv[i]->type == ESEXP_RES_STRING)
|
||||
truth = camel_search_header_match(header, argv[i]->value.string,
|
||||
how, type, NULL);
|
||||
if (argv[i]->type == ESEXP_RES_STRING) {
|
||||
if (argv[i]->value.string[0] == 0) {
|
||||
truth = TRUE;
|
||||
} else if (how == CAMEL_SEARCH_MATCH_CONTAINS) {
|
||||
/* doesn't make sense to split words on anything but contains i.e. we can't have an ending match different words */
|
||||
words = camel_search_words_split(argv[i]->value.string);
|
||||
truth = TRUE;
|
||||
for (j=0;j<words->len && truth;j++) {
|
||||
truth = camel_search_header_match(header, words->words[j]->word, how, type, NULL);
|
||||
}
|
||||
camel_search_words_free(words);
|
||||
} else {
|
||||
truth = camel_search_header_match(header, argv[i]->value.string, how, type, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -723,64 +736,16 @@ g_lib_sux_htor(char *key, int value, struct _glib_sux_donkeys *fuckup)
|
||||
g_ptr_array_add(fuckup->uids, key);
|
||||
}
|
||||
|
||||
static int
|
||||
match_message(CamelFolder *folder, const char *uid, regex_t *pattern, CamelException *ex)
|
||||
/* and, only store duplicates */
|
||||
static void
|
||||
g_lib_sux_htand(char *key, int value, struct _glib_sux_donkeys *fuckup)
|
||||
{
|
||||
CamelMimeMessage *msg;
|
||||
int truth = FALSE;
|
||||
|
||||
msg = camel_folder_get_message(folder, uid, ex);
|
||||
if (!camel_exception_is_set(ex) && msg!=NULL) {
|
||||
truth = camel_search_message_body_contains((CamelDataWrapper *)msg, pattern);
|
||||
camel_object_unref((CamelObject *)msg);
|
||||
} else {
|
||||
camel_exception_clear(ex);
|
||||
}
|
||||
return truth;
|
||||
if (value == fuckup->count)
|
||||
g_ptr_array_add(fuckup->uids, key);
|
||||
}
|
||||
|
||||
/* perform a regex match against words in an index */
|
||||
/* uids = hash table of messageinfo's by uid's */
|
||||
static GPtrArray *
|
||||
match_messages_index(CamelIndex *idx, regex_t *pattern, GHashTable *uids, CamelException *ex)
|
||||
{
|
||||
GPtrArray *result = g_ptr_array_new();
|
||||
GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
struct _glib_sux_donkeys lambdafoo;
|
||||
CamelIndexCursor *wc, *nc;
|
||||
const char *word, *name;
|
||||
CamelMessageInfo *mi;
|
||||
|
||||
wc = camel_index_words(idx);
|
||||
if (wc) {
|
||||
while ((word = camel_index_cursor_next(wc))) {
|
||||
if (regexec(pattern, word, 0, NULL, 0) == 0) {
|
||||
/* perf: could have the wc cursor return the name cursor */
|
||||
nc = camel_index_find(idx, word);
|
||||
if (nc) {
|
||||
while ((name = camel_index_cursor_next(nc))) {
|
||||
mi = g_hash_table_lookup(uids, name);
|
||||
if (mi)
|
||||
g_hash_table_insert(ht, (char *)camel_message_info_uid(mi), (void *)1);
|
||||
}
|
||||
camel_object_unref((CamelObject *)nc);
|
||||
}
|
||||
}
|
||||
}
|
||||
camel_object_unref((CamelObject *)wc);
|
||||
|
||||
lambdafoo.uids = result;
|
||||
g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
|
||||
g_hash_table_destroy(ht);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/* perform a regex match against an individual uid in an index */
|
||||
/* this would benefit greatly in practice if there was a hashtalbe of uid's to amtch against */
|
||||
static int
|
||||
match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelException *ex)
|
||||
match_message_index(CamelIndex *idx, const char *uid, const char *match, CamelException *ex)
|
||||
{
|
||||
CamelIndexCursor *wc, *nc;
|
||||
const char *word, *name;
|
||||
@ -789,7 +754,7 @@ match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelExc
|
||||
wc = camel_index_words(idx);
|
||||
if (wc) {
|
||||
while (!truth && (word = camel_index_cursor_next(wc))) {
|
||||
if (regexec(pattern, word, 0, NULL, 0) == 0) {
|
||||
if (camel_ustrstrcase(word,match) != NULL) {
|
||||
/* perf: could have the wc cursor return the name cursor */
|
||||
nc = camel_index_find(idx, word);
|
||||
if (nc) {
|
||||
@ -805,68 +770,227 @@ match_message_index(CamelIndex *idx, const char *uid, regex_t *pattern, CamelExc
|
||||
return truth;
|
||||
}
|
||||
|
||||
/*
|
||||
"one two" "three" "four five"
|
||||
|
||||
one and two
|
||||
or
|
||||
three
|
||||
or
|
||||
four and five
|
||||
*/
|
||||
|
||||
/* returns messages which contain all words listed in words */
|
||||
static GPtrArray *
|
||||
match_words_index(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
|
||||
{
|
||||
GPtrArray *result = g_ptr_array_new();
|
||||
GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
struct _glib_sux_donkeys lambdafoo;
|
||||
CamelIndexCursor *wc, *nc;
|
||||
const char *word, *name;
|
||||
CamelMessageInfo *mi;
|
||||
int i;
|
||||
|
||||
/* we can have a maximum of 32 words, as we use it as the AND mask */
|
||||
|
||||
wc = camel_index_words(search->body_index);
|
||||
if (wc) {
|
||||
while ((word = camel_index_cursor_next(wc))) {
|
||||
for (i=0;i<words->len;i++) {
|
||||
if (camel_ustrstrcase(word, words->words[i]->word) != NULL) {
|
||||
/* perf: could have the wc cursor return the name cursor */
|
||||
nc = camel_index_find(search->body_index, word);
|
||||
if (nc) {
|
||||
while ((name = camel_index_cursor_next(nc))) {
|
||||
mi = g_hash_table_lookup(search->summary_hash, name);
|
||||
if (mi) {
|
||||
int mask;
|
||||
const char *uid = camel_message_info_uid(mi);
|
||||
|
||||
mask = ((int)g_hash_table_lookup(ht, uid)) | (1<<i);
|
||||
g_hash_table_insert(ht, (char *)uid, (void *)mask);
|
||||
}
|
||||
}
|
||||
camel_object_unref((CamelObject *)nc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
camel_object_unref((CamelObject *)wc);
|
||||
|
||||
lambdafoo.uids = result;
|
||||
lambdafoo.count = (1<<words->len) - 1;
|
||||
g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htand, &lambdafoo);
|
||||
g_hash_table_destroy(ht);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
match_words_1message (CamelDataWrapper *object, struct _camel_search_words *words, guint32 *mask)
|
||||
{
|
||||
CamelDataWrapper *containee;
|
||||
int truth = FALSE;
|
||||
int parts, i;
|
||||
|
||||
containee = camel_medium_get_content_object (CAMEL_MEDIUM (object));
|
||||
|
||||
if (containee == NULL)
|
||||
return FALSE;
|
||||
|
||||
/* using the object types is more accurate than using the mime/types */
|
||||
if (CAMEL_IS_MULTIPART (containee)) {
|
||||
parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
|
||||
for (i = 0; i < parts && truth == FALSE; i++) {
|
||||
CamelDataWrapper *part = (CamelDataWrapper *)camel_multipart_get_part (CAMEL_MULTIPART (containee), i);
|
||||
if (part)
|
||||
truth = match_words_1message(part, words, mask);
|
||||
}
|
||||
} else if (CAMEL_IS_MIME_MESSAGE (containee)) {
|
||||
/* for messages we only look at its contents */
|
||||
truth = match_words_1message((CamelDataWrapper *)containee, words, mask);
|
||||
} else if (header_content_type_is(CAMEL_DATA_WRAPPER (containee)->mime_type, "text", "*")) {
|
||||
/* for all other text parts, we look inside, otherwise we dont care */
|
||||
CamelStreamMem *mem = (CamelStreamMem *)camel_stream_mem_new ();
|
||||
|
||||
/* FIXME: The match should be part of a stream op */
|
||||
camel_data_wrapper_write_to_stream (containee, CAMEL_STREAM (mem));
|
||||
camel_stream_write (CAMEL_STREAM (mem), "", 1);
|
||||
for (i=0;i<words->len;i++) {
|
||||
/* FIXME: This is horridly slow, and should use a real search algorithm */
|
||||
if (camel_ustrstrcase(mem->buffer->data, words->words[i]->word) != NULL) {
|
||||
*mask |= (1<<i);
|
||||
/* shortcut a match */
|
||||
if (*mask == (1<<(words->len))-1)
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
camel_object_unref (CAMEL_OBJECT (mem));
|
||||
}
|
||||
|
||||
return truth;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
match_words_message(CamelFolder *folder, const char *uid, struct _camel_search_words *words, CamelException *ex)
|
||||
{
|
||||
guint32 mask;
|
||||
CamelMimeMessage *msg;
|
||||
int truth;
|
||||
|
||||
msg = camel_folder_get_message(folder, uid, ex);
|
||||
if (msg) {
|
||||
mask = 0;
|
||||
truth = match_words_1message((CamelDataWrapper *)msg, words, &mask);
|
||||
camel_object_unref((CamelObject *)msg);
|
||||
} else {
|
||||
camel_exception_clear(ex);
|
||||
truth = FALSE;
|
||||
}
|
||||
|
||||
return truth;
|
||||
}
|
||||
|
||||
static GPtrArray *
|
||||
match_words_messages(CamelFolderSearch *search, struct _camel_search_words *words, CamelException *ex)
|
||||
{
|
||||
int i;
|
||||
GPtrArray *matches = g_ptr_array_new();
|
||||
|
||||
if (search->body_index) {
|
||||
GPtrArray *indexed;
|
||||
struct _camel_search_words *simple;
|
||||
|
||||
simple = camel_search_words_simple(words);
|
||||
indexed = match_words_index(search, simple, ex);
|
||||
camel_search_words_free(simple);
|
||||
|
||||
for (i=0;i<indexed->len;i++) {
|
||||
const char *uid = g_ptr_array_index(indexed, i);
|
||||
|
||||
if (match_words_message(search->folder, uid, words, ex))
|
||||
g_ptr_array_add(matches, (char *)uid);
|
||||
}
|
||||
|
||||
g_ptr_array_free(indexed, TRUE);
|
||||
} else {
|
||||
for (i=0;i<search->summary->len;i++) {
|
||||
CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
|
||||
const char *uid = camel_message_info_uid(info);
|
||||
|
||||
if (match_words_message(search->folder, uid, words, ex))
|
||||
g_ptr_array_add(matches, (char *)uid);
|
||||
}
|
||||
}
|
||||
|
||||
return matches;
|
||||
}
|
||||
|
||||
static ESExpResult *
|
||||
search_body_contains(struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *search)
|
||||
{
|
||||
ESExpResult *r;
|
||||
int i;
|
||||
regex_t pattern;
|
||||
int i, j;
|
||||
CamelException *ex = search->priv->ex;
|
||||
struct _camel_search_words *words;
|
||||
ESExpResult *r;
|
||||
struct _glib_sux_donkeys lambdafoo;
|
||||
|
||||
if (search->current) {
|
||||
if (search->current) {
|
||||
int truth = FALSE;
|
||||
|
||||
if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
|
||||
if (argc == 1 && argv[0]->value.string[0] == 0) {
|
||||
truth = TRUE;
|
||||
} else if (search->body_index) {
|
||||
if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
|
||||
truth = match_message_index(search->body_index, camel_message_info_uid(search->current), &pattern, ex);
|
||||
regfree(&pattern);
|
||||
}
|
||||
} else if (search->folder) {
|
||||
/* we do a 'slow' direct search */
|
||||
if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
|
||||
truth = match_message(search->folder, camel_message_info_uid(search->current), &pattern, ex);
|
||||
regfree(&pattern);
|
||||
}
|
||||
} else {
|
||||
g_warning("Cannot perform indexed body query with no index or folder set");
|
||||
for (i=0;i<argc && !truth;i++) {
|
||||
if (argv[i]->type == ESEXP_RES_STRING) {
|
||||
words = camel_search_words_split(argv[i]->value.string);
|
||||
truth = TRUE;
|
||||
if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
|
||||
for (j=0;j<words->len && truth;j++)
|
||||
truth = match_message_index(search->body_index, camel_message_info_uid(search->current), words->words[j]->word, ex);
|
||||
} else {
|
||||
/* TODO: cache current message incase of multiple body search terms */
|
||||
truth = match_words_message(search->folder, camel_message_info_uid(search->current), words, ex);
|
||||
}
|
||||
camel_search_words_free(words);
|
||||
}
|
||||
}
|
||||
}
|
||||
r = e_sexp_result_new(f, ESEXP_RES_BOOL);
|
||||
r->value.bool = truth;
|
||||
} else {
|
||||
r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
|
||||
r->value.ptrarray = g_ptr_array_new();
|
||||
|
||||
if (argc == 1 && argv[0]->value.string[0] == 0 && search->folder) {
|
||||
/* optimise the match "" case - match everything */
|
||||
r->value.ptrarray = g_ptr_array_new();
|
||||
if (argc == 1 && argv[0]->value.string[0] == 0) {
|
||||
for (i=0;i<search->summary->len;i++) {
|
||||
CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
|
||||
|
||||
g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
|
||||
}
|
||||
} else if (search->body_index) {
|
||||
if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
|
||||
r->value.ptrarray = match_messages_index(search->body_index, &pattern, search->summary_hash, ex);
|
||||
regfree(&pattern);
|
||||
}
|
||||
} else if (search->folder) {
|
||||
/* do a slow search */
|
||||
r->value.ptrarray = g_ptr_array_new();
|
||||
if (camel_search_build_match_regex(&pattern, CAMEL_SEARCH_MATCH_ICASE, argc, argv, ex) == 0) {
|
||||
if (search->summary) {
|
||||
for (i=0;i<search->summary->len;i++) {
|
||||
CamelMessageInfo *info = g_ptr_array_index(search->summary, i);
|
||||
|
||||
if (match_message(search->folder, camel_message_info_uid(info), &pattern, ex))
|
||||
g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
|
||||
}
|
||||
} /* else? we could always get the summary from the folder, but then
|
||||
we need to free it later somehow */
|
||||
regfree(&pattern);
|
||||
}
|
||||
} else {
|
||||
g_warning("Cannot perform indexed body query with no index or folder set");
|
||||
r->value.ptrarray = g_ptr_array_new();
|
||||
GHashTable *ht = g_hash_table_new(g_str_hash, g_str_equal);
|
||||
GPtrArray *matches;
|
||||
|
||||
for (i=0;i<argc;i++) {
|
||||
if (argv[i]->type == ESEXP_RES_STRING) {
|
||||
words = camel_search_words_split(argv[i]->value.string);
|
||||
if ((words->type & CAMEL_SEARCH_WORD_COMPLEX) == 0 && search->body_index) {
|
||||
matches = match_words_index(search, words, ex);
|
||||
} else {
|
||||
matches = match_words_messages(search, words, ex);
|
||||
}
|
||||
for (j=0;j<matches->len;j++)
|
||||
g_hash_table_insert(ht, matches->pdata[j], matches->pdata[j]);
|
||||
g_ptr_array_free(matches, TRUE);
|
||||
camel_search_words_free(words);
|
||||
}
|
||||
}
|
||||
lambdafoo.uids = r->value.ptrarray;
|
||||
g_hash_table_foreach(ht, (GHFunc)g_lib_sux_htor, &lambdafoo);
|
||||
g_hash_table_destroy(ht);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -194,6 +194,7 @@ header_soundex (const char *header, const char *match)
|
||||
return truth;
|
||||
}
|
||||
|
||||
/* FIXME: This is stupidly slow and needs to be removed */
|
||||
static gunichar
|
||||
utf8_get (const char **inp)
|
||||
{
|
||||
@ -209,7 +210,7 @@ utf8_get (const char **inp)
|
||||
return c;
|
||||
}
|
||||
|
||||
static const char *
|
||||
const char *
|
||||
camel_ustrstrcase (const char *haystack, const char *needle)
|
||||
{
|
||||
gunichar *nuni, *puni;
|
||||
@ -469,9 +470,6 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
|
||||
if (containee == NULL)
|
||||
return FALSE;
|
||||
|
||||
/* TODO: I find it odd that get_part and get_content_object do not
|
||||
add a reference, probably need fixing for multithreading */
|
||||
|
||||
/* using the object types is more accurate than using the mime/types */
|
||||
if (CAMEL_IS_MULTIPART (containee)) {
|
||||
parts = camel_multipart_get_number (CAMEL_MULTIPART (containee));
|
||||
@ -496,3 +494,155 @@ camel_search_message_body_contains (CamelDataWrapper *object, regex_t *pattern)
|
||||
return truth;
|
||||
}
|
||||
|
||||
static __inline__ guint32
|
||||
camel_utf8_getc(const unsigned char **ptr)
|
||||
{
|
||||
register unsigned char *p = (unsigned char *)*ptr;
|
||||
register unsigned char c, r;
|
||||
register guint32 v=0, /* this is only required because the stupid @@@%#%# compiler thinks it can be used uninitialised */
|
||||
m;
|
||||
|
||||
r = *p++;
|
||||
loop:
|
||||
if (r < 0x80) {
|
||||
*ptr = p;
|
||||
v = r;
|
||||
} else if (r < 0xfe) { /* valid start char? */
|
||||
v = r;
|
||||
m = 0x7f80; /* used to mask out the length bits */
|
||||
do {
|
||||
c = *p++;
|
||||
if ((c & 0xc0) != 0x80) {
|
||||
r = c;
|
||||
goto loop;
|
||||
}
|
||||
v = (v<<6) | (c & 0x3f);
|
||||
r<<=1;
|
||||
m<<=5;
|
||||
} while (r & 0x40);
|
||||
|
||||
*ptr = p;
|
||||
|
||||
v &= ~m;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
struct _camel_search_words *
|
||||
camel_search_words_split(const unsigned char *in)
|
||||
{
|
||||
int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
|
||||
GString *w;
|
||||
struct _camel_search_word *word;
|
||||
struct _camel_search_words *words;
|
||||
GPtrArray *list = g_ptr_array_new();
|
||||
guint32 c;
|
||||
int utf8len;
|
||||
char utf8[8];
|
||||
|
||||
words = g_malloc0(sizeof(*words));
|
||||
w = g_string_new("");
|
||||
|
||||
do {
|
||||
c = camel_utf8_getc(&in);
|
||||
if (c == 0 || g_unichar_isspace(c)) {
|
||||
if (w->len) {
|
||||
word = g_malloc0(sizeof(*word));
|
||||
word->word = g_strdup(w->str);
|
||||
word->type = type;
|
||||
g_ptr_array_add(list, word);
|
||||
all |= type;
|
||||
type = CAMEL_SEARCH_WORD_SIMPLE;
|
||||
g_string_truncate(w, 0);
|
||||
}
|
||||
} else {
|
||||
if (!g_unichar_isalnum(c))
|
||||
type = CAMEL_SEARCH_WORD_COMPLEX;
|
||||
else
|
||||
c = g_unichar_tolower(c);
|
||||
if (c > 0x80)
|
||||
type |= CAMEL_SEARCH_WORD_8BIT;
|
||||
|
||||
utf8len = g_unichar_to_utf8(c, utf8);
|
||||
utf8[utf8len] = 0;
|
||||
g_string_append(w, utf8);
|
||||
}
|
||||
} while (c);
|
||||
|
||||
g_string_free(w, TRUE);
|
||||
words->len = list->len;
|
||||
words->words = (struct _camel_search_word **)list->pdata;
|
||||
words->type = all;
|
||||
g_ptr_array_free(list, FALSE);
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
/* takes an existing 'words' list, and converts it to another consisting of
|
||||
only simple words, with any punctuation etc stripped */
|
||||
struct _camel_search_words *
|
||||
camel_search_words_simple(struct _camel_search_words *wordin)
|
||||
{
|
||||
int i;
|
||||
const unsigned char *ptr, *start, *last;
|
||||
int type = CAMEL_SEARCH_WORD_SIMPLE, all = 0;
|
||||
GPtrArray *list = g_ptr_array_new();
|
||||
struct _camel_search_word *word;
|
||||
struct _camel_search_words *words;
|
||||
guint32 c;
|
||||
|
||||
words = g_malloc0(sizeof(*words));
|
||||
|
||||
for (i=0;i<wordin->len;i++) {
|
||||
if ((wordin->words[i]->type & CAMEL_SEARCH_WORD_COMPLEX) == 0) {
|
||||
word = g_malloc0(sizeof(*word));
|
||||
word->type = wordin->words[i]->type;
|
||||
word->word = g_strdup(wordin->words[i]->word);
|
||||
g_ptr_array_add(list, word);
|
||||
} else {
|
||||
ptr = wordin->words[i]->word;
|
||||
start = last = ptr;
|
||||
do {
|
||||
c = camel_utf8_getc(&ptr);
|
||||
if (c == 0 || !g_unichar_isalnum(c)) {
|
||||
if (last > start) {
|
||||
word = g_malloc0(sizeof(*word));
|
||||
word->word = g_strndup(start, last-start);
|
||||
word->type = type;
|
||||
g_ptr_array_add(list, word);
|
||||
all |= type;
|
||||
type = CAMEL_SEARCH_WORD_SIMPLE;
|
||||
}
|
||||
start = ptr;
|
||||
}
|
||||
if (c > 0x80)
|
||||
type = CAMEL_SEARCH_WORD_8BIT;
|
||||
last = ptr;
|
||||
} while (c);
|
||||
}
|
||||
}
|
||||
|
||||
words->len = list->len;
|
||||
words->words = (struct _camel_search_word **)list->pdata;
|
||||
words->type = all;
|
||||
g_ptr_array_free(list, FALSE);
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
void
|
||||
camel_search_words_free(struct _camel_search_words *words)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0;i<words->len;i++) {
|
||||
struct _camel_search_word *word = words->words[i];
|
||||
|
||||
g_free(word->word);
|
||||
g_free(word);
|
||||
}
|
||||
g_free(words->words);
|
||||
g_free(words);
|
||||
}
|
||||
|
||||
|
||||
@ -21,6 +21,8 @@
|
||||
#ifndef _CAMEL_SEARCH_PRIVATE_H
|
||||
#define _CAMEL_SEARCH_PRIVATE_H
|
||||
|
||||
#include <regex.h>
|
||||
|
||||
typedef enum {
|
||||
CAMEL_SEARCH_MATCH_START = 1<<0,
|
||||
CAMEL_SEARCH_MATCH_END = 1<<1,
|
||||
@ -52,4 +54,29 @@ gboolean camel_search_message_body_contains(CamelDataWrapper *object, regex_t *p
|
||||
gboolean camel_search_header_match(const char *value, const char *match, camel_search_match_t how, camel_search_t type, const char *default_charset);
|
||||
gboolean camel_search_header_soundex(const char *header, const char *match);
|
||||
|
||||
/* TODO: replace with a real search function */
|
||||
const char *camel_ustrstrcase(const char *haystack, const char *needle);
|
||||
|
||||
/* Some crappy utility functions for handling multiple search words */
|
||||
enum _camel_search_word_t {
|
||||
CAMEL_SEARCH_WORD_SIMPLE = 1,
|
||||
CAMEL_SEARCH_WORD_COMPLEX = 2,
|
||||
CAMEL_SEARCH_WORD_8BIT = 4,
|
||||
};
|
||||
struct _camel_search_word {
|
||||
enum _camel_search_word_t type;
|
||||
char *word;
|
||||
};
|
||||
|
||||
struct _camel_search_words {
|
||||
int len;
|
||||
enum _camel_search_word_t type; /* OR of all word types in list */
|
||||
struct _camel_search_word **words;
|
||||
};
|
||||
|
||||
struct _camel_search_words *camel_search_words_split(const unsigned char *in);
|
||||
struct _camel_search_words *camel_search_words_simple(struct _camel_search_words *wordin);
|
||||
void camel_search_words_free(struct _camel_search_words *);
|
||||
|
||||
#endif /* ! _CAMEL_SEARCH_PRIVATE_H */
|
||||
|
||||
|
||||
@ -42,6 +42,7 @@
|
||||
#include "camel-mime-utils.h" /* base64 encoding */
|
||||
|
||||
#include "camel-seekable-stream.h"
|
||||
#include "camel-search-private.h"
|
||||
|
||||
#define d(x) x
|
||||
|
||||
@ -304,10 +305,13 @@ static int
|
||||
sync_match(CamelImapSearch *is, struct _match_record *mr)
|
||||
{
|
||||
char *p, *result, *lasts = NULL;
|
||||
CamelImapResponse *response;
|
||||
CamelImapResponse *response = NULL;
|
||||
guint32 uid;
|
||||
CamelFolder *folder = ((CamelFolderSearch *)is)->folder;
|
||||
CamelImapStore *store = (CamelImapStore *)folder->parent_store;
|
||||
struct _camel_search_words *words;
|
||||
GString *search;
|
||||
int i;
|
||||
|
||||
if (mr->lastuid >= is->lastuid && mr->validity == is->validity)
|
||||
return 0;
|
||||
@ -316,9 +320,36 @@ sync_match(CamelImapSearch *is, struct _match_record *mr)
|
||||
|
||||
/* TODO: Handle multiple search terms */
|
||||
|
||||
response = camel_imap_command (store, folder, NULL,
|
||||
"UID SEARCH UID %d:%d BODY \"%s\"",
|
||||
mr->lastuid+1, is->lastuid, mr->terms[0]);
|
||||
/* This handles multiple search words within a single term */
|
||||
words = camel_search_words_split(mr->terms[0]);
|
||||
search = g_string_new("");
|
||||
g_string_sprintfa(search, "UID %d:%d", mr->lastuid+1, is->lastuid);
|
||||
for (i=0;i<words->len;i++) {
|
||||
char *w = words->words[i]->word, c;
|
||||
|
||||
g_string_sprintfa(search, " BODY \"");
|
||||
while ((c = *w++)) {
|
||||
if (c == '\\' || c == '"')
|
||||
g_string_append_c(search, '\\');
|
||||
g_string_append_c(search, c);
|
||||
}
|
||||
g_string_append_c(search, '"');
|
||||
}
|
||||
camel_search_words_free(words);
|
||||
|
||||
/* We only try search using utf8 if its non us-ascii text? */
|
||||
if ((words->type & CAMEL_SEARCH_WORD_8BIT) && (store->capabilities & IMAP_CAPABILITY_utf8_search)) {
|
||||
response = camel_imap_command(store, folder, NULL,
|
||||
"UID SEARCH CHARSET UTF-8 %s", search->str);
|
||||
/* We can't actually tell if we got a NO response, so assume always */
|
||||
if (response == NULL)
|
||||
store->capabilities &= ~IMAP_CAPABILITY_utf8_search;
|
||||
}
|
||||
if (response == NULL)
|
||||
response = camel_imap_command (store, folder, NULL,
|
||||
"UID SEARCH %s", search->str);
|
||||
g_string_free(search, TRUE);
|
||||
|
||||
if (!response)
|
||||
return -1;
|
||||
result = camel_imap_response_extract (store, response, "SEARCH", NULL);
|
||||
|
||||
@ -342,7 +342,8 @@ connect_to_server (CamelService *service, CamelException *ex)
|
||||
store->connected = TRUE;
|
||||
|
||||
/* Find out the IMAP capabilities */
|
||||
store->capabilities = 0;
|
||||
/* We assume we have utf8 capable search until a failed search tells us otherwise */
|
||||
store->capabilities = IMAP_CAPABILITY_utf8_search;
|
||||
store->authtypes = g_hash_table_new (g_str_hash, g_str_equal);
|
||||
response = camel_imap_command (store, NULL, ex, "CAPABILITY");
|
||||
if (!response)
|
||||
|
||||
@ -71,6 +71,7 @@ typedef enum {
|
||||
#define IMAP_CAPABILITY_UIDPLUS (1 << 4)
|
||||
#define IMAP_CAPABILITY_LITERALPLUS (1 << 5)
|
||||
#define IMAP_CAPABILITY_useful_lsub (1 << 6)
|
||||
#define IMAP_CAPABILITY_utf8_search (1 << 7)
|
||||
|
||||
#define IMAP_PARAM_OVERRIDE_NAMESPACE (1 << 0)
|
||||
#define IMAP_PARAM_CHECK_ALL (1 << 1)
|
||||
|
||||
Reference in New Issue
Block a user