Use camel-url-scanner instead of regex.

2002-12-10  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
	instead of regex.

svn path=/trunk/; revision=19087
This commit is contained in:
Jeffrey Stedfast
2002-12-10 17:40:34 +00:00
committed by Jeffrey Stedfast
parent 2dd7fb3a14
commit 1de04ce7bd
3 changed files with 43 additions and 90 deletions

View File

@ -1,3 +1,8 @@
2002-12-10 Jeffrey Stedfast <fejj@ximian.com>
* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
instead of regex.
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be

View File

@ -27,31 +27,32 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <regex.h>
#include "camel-url-scanner.h"
#include "camel-mime-filter-tohtml.h"
#define d(x)
#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
struct _UrlRegexPattern {
static struct {
unsigned int mask;
char *pattern;
char *prefix;
regex_t *preg;
regmatch_t matches;
urlpattern_t pattern;
} patterns[] = {
{ CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } },
{ CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } },
{ CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } },
{ CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
};
static struct _UrlRegexPattern patterns[] = {
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } },
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } }
};
#define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
@ -83,35 +84,14 @@ static void
camel_mime_filter_tohtml_finalize (CamelObject *obj)
{
CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
int i;
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
if (filter->patterns[i].preg) {
regfree (filter->patterns[i].preg);
g_free (filter->patterns[i].preg);
}
}
g_free (filter->patterns);
camel_url_scanner_free (filter->scanner);
}
static void
camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
{
int i;
/* FIXME: use a global set of patterns instead? */
filter->patterns = g_malloc (sizeof (patterns));
memcpy (filter->patterns, patterns, sizeof (patterns));
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
filter->patterns[i].preg = g_malloc (sizeof (regex_t));
if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) {
/* error building the regex_t so we can't use this pattern */
filter->patterns[i].preg = NULL;
filter->patterns[i].mask = 0;
}
}
filter->scanner = camel_url_scanner_new ();
filter->flags = 0;
filter->colour = 0;
@ -279,64 +259,28 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
if (html->flags & CONVERT_URLS) {
struct _UrlRegexPattern *fmatch, *pat;
size_t matchlen, len;
regoff_t offset;
char *linebuf;
char save;
int i;
size_t matchlen, buflen, len;
urlmatch_t match;
len = inptr - start;
linebuf = g_malloc (len + 1);
memcpy (linebuf, start, len);
linebuf[len] = '\0';
start = linebuf;
save = '\0';
do {
/* search for all of our patterns */
offset = 0;
fmatch = NULL;
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
pat = html->patterns + i;
if ((html->flags & pat->mask) &&
!regexec (pat->preg, start, 1, &pat->matches, 0)) {
if (pat->matches.rm_so < offset) {
*(start + offset) = save;
fmatch = NULL;
}
if (!fmatch) {
fmatch = pat;
offset = pat->matches.rm_so;
/* optimisation so we don't have to search the
entire line buffer for the next pattern */
save = *(start + offset);
*(start + offset) = '\0';
}
}
}
if (fmatch) {
/* restore our char */
*(start + offset) = save;
if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
/* write out anything before the first regex match */
outptr = writeln (filter, start, start + offset, outptr, &outend);
start += offset;
len -= offset;
outptr = writeln (filter, start, start + match.um_so,
outptr, &outend);
#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so)
matchlen = MATCHLEN (fmatch->matches);
start += match.um_so;
len -= match.um_so;
i = 20 + strlen (fmatch->prefix) + matchlen + matchlen;
outptr = check_size (filter, outptr, &outend, i);
matchlen = match.um_eo - match.um_so;
buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
outptr = check_size (filter, outptr, &outend, buflen);
/* write out the href tag */
outptr = g_stpcpy (outptr, "<a href=\"");
outptr = g_stpcpy (outptr, fmatch->prefix);
outptr = g_stpcpy (outptr, match.prefix);
memcpy (outptr, start, matchlen);
outptr += matchlen;
outptr = g_stpcpy (outptr, "\">");
@ -356,8 +300,6 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
break;
}
} while (len > 0);
g_free (linebuf);
} else {
outptr = writeln (filter, start, inptr, outptr, &outend);
}
@ -448,11 +390,17 @@ CamelMimeFilter *
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
{
CamelMimeFilterToHTML *new;
int i;
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
new->flags = flags;
new->colour = colour;
for (i = 0; i < NUM_URL_PATTERNS; i++) {
if (patterns[i].mask & flags)
camel_url_scanner_add (new->scanner, &patterns[i].pattern);
}
return CAMEL_MIME_FILTER (new);
}

View File

@ -51,7 +51,7 @@ typedef struct _CamelMimeFilterToHTML CamelMimeFilterToHTML;
struct _CamelMimeFilterToHTML {
CamelMimeFilter parent;
struct _UrlRegexPattern *patterns;
struct _CamelUrlScanner *scanner;
guint32 flags;
guint32 colour;