Use camel-url-scanner instead of regex.

2002-12-10  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
	instead of regex.

svn path=/trunk/; revision=19087
This commit is contained in:
Jeffrey Stedfast
2002-12-10 17:40:34 +00:00
committed by Jeffrey Stedfast
parent 2dd7fb3a14
commit 1de04ce7bd
3 changed files with 43 additions and 90 deletions

View File

@ -1,3 +1,8 @@
2002-12-10 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
instead of regex.
2002-12-09 Jeffrey Stedfast <fejj@ximian.com> 2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be * camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be

View File

@ -27,31 +27,32 @@
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <ctype.h>
#include <regex.h>
#include "camel-url-scanner.h"
#include "camel-mime-filter-tohtml.h" #include "camel-mime-filter-tohtml.h"
#define d(x) #define d(x)
#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
struct _UrlRegexPattern { static struct {
unsigned int mask; unsigned int mask;
char *pattern; urlpattern_t pattern;
char *prefix; } patterns[] = {
regex_t *preg; { CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } },
regmatch_t matches; { CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } },
{ CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
}; };
static struct _UrlRegexPattern patterns[] = { #define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } }
};
#define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass); static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter); static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
@ -83,35 +84,14 @@ static void
camel_mime_filter_tohtml_finalize (CamelObject *obj) camel_mime_filter_tohtml_finalize (CamelObject *obj)
{ {
CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj; CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
int i;
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) { camel_url_scanner_free (filter->scanner);
if (filter->patterns[i].preg) {
regfree (filter->patterns[i].preg);
g_free (filter->patterns[i].preg);
}
}
g_free (filter->patterns);
} }
static void static void
camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter) camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
{ {
int i; filter->scanner = camel_url_scanner_new ();
/* FIXME: use a global set of patterns instead? */
filter->patterns = g_malloc (sizeof (patterns));
memcpy (filter->patterns, patterns, sizeof (patterns));
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
filter->patterns[i].preg = g_malloc (sizeof (regex_t));
if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) {
/* error building the regex_t so we can't use this pattern */
filter->patterns[i].preg = NULL;
filter->patterns[i].mask = 0;
}
}
filter->flags = 0; filter->flags = 0;
filter->colour = 0; filter->colour = 0;
@ -279,64 +259,28 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES) #define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
if (html->flags & CONVERT_URLS) { if (html->flags & CONVERT_URLS) {
struct _UrlRegexPattern *fmatch, *pat; size_t matchlen, buflen, len;
size_t matchlen, len; urlmatch_t match;
regoff_t offset;
char *linebuf;
char save;
int i;
len = inptr - start; len = inptr - start;
linebuf = g_malloc (len + 1);
memcpy (linebuf, start, len);
linebuf[len] = '\0';
start = linebuf;
save = '\0';
do { do {
/* search for all of our patterns */ if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
offset = 0;
fmatch = NULL;
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
pat = html->patterns + i;
if ((html->flags & pat->mask) &&
!regexec (pat->preg, start, 1, &pat->matches, 0)) {
if (pat->matches.rm_so < offset) {
*(start + offset) = save;
fmatch = NULL;
}
if (!fmatch) {
fmatch = pat;
offset = pat->matches.rm_so;
/* optimisation so we don't have to search the
entire line buffer for the next pattern */
save = *(start + offset);
*(start + offset) = '\0';
}
}
}
if (fmatch) {
/* restore our char */
*(start + offset) = save;
/* write out anything before the first regex match */ /* write out anything before the first regex match */
outptr = writeln (filter, start, start + offset, outptr, &outend); outptr = writeln (filter, start, start + match.um_so,
start += offset; outptr, &outend);
len -= offset;
#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so) start += match.um_so;
matchlen = MATCHLEN (fmatch->matches); len -= match.um_so;
i = 20 + strlen (fmatch->prefix) + matchlen + matchlen; matchlen = match.um_eo - match.um_so;
outptr = check_size (filter, outptr, &outend, i);
buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
outptr = check_size (filter, outptr, &outend, buflen);
/* write out the href tag */ /* write out the href tag */
outptr = g_stpcpy (outptr, "<a href=\""); outptr = g_stpcpy (outptr, "<a href=\"");
outptr = g_stpcpy (outptr, fmatch->prefix); outptr = g_stpcpy (outptr, match.prefix);
memcpy (outptr, start, matchlen); memcpy (outptr, start, matchlen);
outptr += matchlen; outptr += matchlen;
outptr = g_stpcpy (outptr, "\">"); outptr = g_stpcpy (outptr, "\">");
@ -356,8 +300,6 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
break; break;
} }
} while (len > 0); } while (len > 0);
g_free (linebuf);
} else { } else {
outptr = writeln (filter, start, inptr, outptr, &outend); outptr = writeln (filter, start, inptr, outptr, &outend);
} }
@ -448,11 +390,17 @@ CamelMimeFilter *
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour) camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
{ {
CamelMimeFilterToHTML *new; CamelMimeFilterToHTML *new;
int i;
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ())); new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
new->flags = flags; new->flags = flags;
new->colour = colour; new->colour = colour;
for (i = 0; i < NUM_URL_PATTERNS; i++) {
if (patterns[i].mask & flags)
camel_url_scanner_add (new->scanner, &patterns[i].pattern);
}
return CAMEL_MIME_FILTER (new); return CAMEL_MIME_FILTER (new);
} }

View File

@ -51,7 +51,7 @@ typedef struct _CamelMimeFilterToHTML CamelMimeFilterToHTML;
struct _CamelMimeFilterToHTML { struct _CamelMimeFilterToHTML {
CamelMimeFilter parent; CamelMimeFilter parent;
struct _UrlRegexPattern *patterns; struct _CamelUrlScanner *scanner;
guint32 flags; guint32 flags;
guint32 colour; guint32 colour;