Use camel-url-scanner instead of regex.
2002-12-10 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner instead of regex. svn path=/trunk/; revision=19087
This commit is contained in:

committed by
Jeffrey Stedfast

parent
2dd7fb3a14
commit
1de04ce7bd
@ -1,3 +1,8 @@
|
||||
2002-12-10 Jeffrey Stedfast <fejj@ximian.com>
|
||||
|
||||
* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
|
||||
instead of regex.
|
||||
|
||||
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
|
||||
|
||||
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be
|
||||
|
@ -27,31 +27,32 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "camel-url-scanner.h"
|
||||
#include "camel-mime-filter-tohtml.h"
|
||||
|
||||
#define d(x)
|
||||
|
||||
#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
|
||||
#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
|
||||
|
||||
struct _UrlRegexPattern {
|
||||
static struct {
|
||||
unsigned int mask;
|
||||
char *pattern;
|
||||
char *prefix;
|
||||
regex_t *preg;
|
||||
regmatch_t matches;
|
||||
urlpattern_t pattern;
|
||||
} patterns[] = {
|
||||
{ CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } },
|
||||
{ CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } },
|
||||
{ CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
|
||||
};
|
||||
|
||||
static struct _UrlRegexPattern patterns[] = {
|
||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } },
|
||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } },
|
||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } },
|
||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } }
|
||||
};
|
||||
|
||||
#define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
|
||||
|
||||
#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
|
||||
|
||||
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
|
||||
static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
|
||||
@ -83,35 +84,14 @@ static void
|
||||
camel_mime_filter_tohtml_finalize (CamelObject *obj)
|
||||
{
|
||||
CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
||||
if (filter->patterns[i].preg) {
|
||||
regfree (filter->patterns[i].preg);
|
||||
g_free (filter->patterns[i].preg);
|
||||
}
|
||||
}
|
||||
|
||||
g_free (filter->patterns);
|
||||
camel_url_scanner_free (filter->scanner);
|
||||
}
|
||||
|
||||
static void
|
||||
camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* FIXME: use a global set of patterns instead? */
|
||||
filter->patterns = g_malloc (sizeof (patterns));
|
||||
memcpy (filter->patterns, patterns, sizeof (patterns));
|
||||
|
||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
||||
filter->patterns[i].preg = g_malloc (sizeof (regex_t));
|
||||
if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) {
|
||||
/* error building the regex_t so we can't use this pattern */
|
||||
filter->patterns[i].preg = NULL;
|
||||
filter->patterns[i].mask = 0;
|
||||
}
|
||||
}
|
||||
filter->scanner = camel_url_scanner_new ();
|
||||
|
||||
filter->flags = 0;
|
||||
filter->colour = 0;
|
||||
@ -279,64 +259,28 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
|
||||
|
||||
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
|
||||
if (html->flags & CONVERT_URLS) {
|
||||
struct _UrlRegexPattern *fmatch, *pat;
|
||||
size_t matchlen, len;
|
||||
regoff_t offset;
|
||||
char *linebuf;
|
||||
char save;
|
||||
int i;
|
||||
size_t matchlen, buflen, len;
|
||||
urlmatch_t match;
|
||||
|
||||
len = inptr - start;
|
||||
linebuf = g_malloc (len + 1);
|
||||
memcpy (linebuf, start, len);
|
||||
linebuf[len] = '\0';
|
||||
|
||||
start = linebuf;
|
||||
save = '\0';
|
||||
|
||||
do {
|
||||
/* search for all of our patterns */
|
||||
offset = 0;
|
||||
fmatch = NULL;
|
||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
||||
pat = html->patterns + i;
|
||||
if ((html->flags & pat->mask) &&
|
||||
!regexec (pat->preg, start, 1, &pat->matches, 0)) {
|
||||
if (pat->matches.rm_so < offset) {
|
||||
*(start + offset) = save;
|
||||
fmatch = NULL;
|
||||
}
|
||||
|
||||
if (!fmatch) {
|
||||
fmatch = pat;
|
||||
offset = pat->matches.rm_so;
|
||||
|
||||
/* optimisation so we don't have to search the
|
||||
entire line buffer for the next pattern */
|
||||
save = *(start + offset);
|
||||
*(start + offset) = '\0';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (fmatch) {
|
||||
/* restore our char */
|
||||
*(start + offset) = save;
|
||||
|
||||
if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
|
||||
/* write out anything before the first regex match */
|
||||
outptr = writeln (filter, start, start + offset, outptr, &outend);
|
||||
start += offset;
|
||||
len -= offset;
|
||||
outptr = writeln (filter, start, start + match.um_so,
|
||||
outptr, &outend);
|
||||
|
||||
#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so)
|
||||
matchlen = MATCHLEN (fmatch->matches);
|
||||
start += match.um_so;
|
||||
len -= match.um_so;
|
||||
|
||||
i = 20 + strlen (fmatch->prefix) + matchlen + matchlen;
|
||||
outptr = check_size (filter, outptr, &outend, i);
|
||||
matchlen = match.um_eo - match.um_so;
|
||||
|
||||
buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
|
||||
outptr = check_size (filter, outptr, &outend, buflen);
|
||||
|
||||
/* write out the href tag */
|
||||
outptr = g_stpcpy (outptr, "<a href=\"");
|
||||
outptr = g_stpcpy (outptr, fmatch->prefix);
|
||||
outptr = g_stpcpy (outptr, match.prefix);
|
||||
memcpy (outptr, start, matchlen);
|
||||
outptr += matchlen;
|
||||
outptr = g_stpcpy (outptr, "\">");
|
||||
@ -356,8 +300,6 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
|
||||
break;
|
||||
}
|
||||
} while (len > 0);
|
||||
|
||||
g_free (linebuf);
|
||||
} else {
|
||||
outptr = writeln (filter, start, inptr, outptr, &outend);
|
||||
}
|
||||
@ -448,11 +390,17 @@ CamelMimeFilter *
|
||||
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
|
||||
{
|
||||
CamelMimeFilterToHTML *new;
|
||||
int i;
|
||||
|
||||
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
|
||||
|
||||
new->flags = flags;
|
||||
new->colour = colour;
|
||||
|
||||
for (i = 0; i < NUM_URL_PATTERNS; i++) {
|
||||
if (patterns[i].mask & flags)
|
||||
camel_url_scanner_add (new->scanner, &patterns[i].pattern);
|
||||
}
|
||||
|
||||
return CAMEL_MIME_FILTER (new);
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ typedef struct _CamelMimeFilterToHTML CamelMimeFilterToHTML;
|
||||
struct _CamelMimeFilterToHTML {
|
||||
CamelMimeFilter parent;
|
||||
|
||||
struct _UrlRegexPattern *patterns;
|
||||
struct _CamelUrlScanner *scanner;
|
||||
|
||||
guint32 flags;
|
||||
guint32 colour;
|
||||
|
Reference in New Issue
Block a user