Use camel-url-scanner instead of regex.
2002-12-10 Jeffrey Stedfast <fejj@ximian.com> * camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner instead of regex. svn path=/trunk/; revision=19087
This commit is contained in:

committed by
Jeffrey Stedfast

parent
2dd7fb3a14
commit
1de04ce7bd
@ -1,3 +1,8 @@
|
|||||||
|
2002-12-10 Jeffrey Stedfast <fejj@ximian.com>
|
||||||
|
|
||||||
|
* camel-mime-filter-tohtml.c (html_convert): Use camel-url-scanner
|
||||||
|
instead of regex.
|
||||||
|
|
||||||
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
|
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
|
||||||
|
|
||||||
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be
|
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be
|
||||||
|
@ -27,31 +27,32 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
|
||||||
#include <regex.h>
|
|
||||||
|
|
||||||
|
#include "camel-url-scanner.h"
|
||||||
#include "camel-mime-filter-tohtml.h"
|
#include "camel-mime-filter-tohtml.h"
|
||||||
|
|
||||||
#define d(x)
|
#define d(x)
|
||||||
|
|
||||||
|
#define CONVERT_WEB_URLS CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS
|
||||||
|
#define CONVERT_ADDRSPEC CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES
|
||||||
|
|
||||||
struct _UrlRegexPattern {
|
static struct {
|
||||||
unsigned int mask;
|
unsigned int mask;
|
||||||
char *pattern;
|
urlpattern_t pattern;
|
||||||
char *prefix;
|
} patterns[] = {
|
||||||
regex_t *preg;
|
{ CONVERT_WEB_URLS, { "file://", "", camel_url_file_start, camel_url_file_end } },
|
||||||
regmatch_t matches;
|
{ CONVERT_WEB_URLS, { "ftp://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "http://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "https://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "news://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "nntp://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "telnet://", "", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "www.", "http://", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_WEB_URLS, { "ftp.", "ftp://", camel_url_web_start, camel_url_web_end } },
|
||||||
|
{ CONVERT_ADDRSPEC, { "@", "mailto:", camel_url_addrspec_start, camel_url_addrspec_end } },
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct _UrlRegexPattern patterns[] = {
|
#define NUM_URL_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
|
||||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "(news|nntp|telnet|file|ftp|http|https)://([-a-z0-9]+(:[-a-z0-9]+)?@)?[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-a-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "", NULL, { 0, 0 } },
|
|
||||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "www\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "http://", NULL, { 0, 0 } },
|
|
||||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS, "ftp\\.[-a-z0-9.]+[-a-z0-9](:[0-9]*)?(/[-A-Za-z0-9_$.+!*(),;:@%&=?/~#]*[^]'.}>\\) ,?!;:\"]?)?", "ftp://", NULL, { 0, 0 } },
|
|
||||||
{ CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES, "([-_a-z0-9.\\+]+@[-_a-z0-9.]+)", "mailto:", NULL, { 0, 0 } }
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NUM_URL_REGEX_PATTERNS (sizeof (patterns) / sizeof (patterns[0]))
|
|
||||||
|
|
||||||
|
|
||||||
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
|
static void camel_mime_filter_tohtml_class_init (CamelMimeFilterToHTMLClass *klass);
|
||||||
static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
|
static void camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter);
|
||||||
@ -83,35 +84,14 @@ static void
|
|||||||
camel_mime_filter_tohtml_finalize (CamelObject *obj)
|
camel_mime_filter_tohtml_finalize (CamelObject *obj)
|
||||||
{
|
{
|
||||||
CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
|
CamelMimeFilterToHTML *filter = (CamelMimeFilterToHTML *) obj;
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
camel_url_scanner_free (filter->scanner);
|
||||||
if (filter->patterns[i].preg) {
|
|
||||||
regfree (filter->patterns[i].preg);
|
|
||||||
g_free (filter->patterns[i].preg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
g_free (filter->patterns);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
|
camel_mime_filter_tohtml_init (CamelMimeFilterToHTML *filter)
|
||||||
{
|
{
|
||||||
int i;
|
filter->scanner = camel_url_scanner_new ();
|
||||||
|
|
||||||
/* FIXME: use a global set of patterns instead? */
|
|
||||||
filter->patterns = g_malloc (sizeof (patterns));
|
|
||||||
memcpy (filter->patterns, patterns, sizeof (patterns));
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
|
||||||
filter->patterns[i].preg = g_malloc (sizeof (regex_t));
|
|
||||||
if (regcomp (filter->patterns[i].preg, patterns[i].pattern, REG_EXTENDED) == -1) {
|
|
||||||
/* error building the regex_t so we can't use this pattern */
|
|
||||||
filter->patterns[i].preg = NULL;
|
|
||||||
filter->patterns[i].mask = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filter->flags = 0;
|
filter->flags = 0;
|
||||||
filter->colour = 0;
|
filter->colour = 0;
|
||||||
@ -279,64 +259,28 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
|
|||||||
|
|
||||||
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
|
#define CONVERT_URLS (CAMEL_MIME_FILTER_TOHTML_CONVERT_URLS | CAMEL_MIME_FILTER_TOHTML_CONVERT_ADDRESSES)
|
||||||
if (html->flags & CONVERT_URLS) {
|
if (html->flags & CONVERT_URLS) {
|
||||||
struct _UrlRegexPattern *fmatch, *pat;
|
size_t matchlen, buflen, len;
|
||||||
size_t matchlen, len;
|
urlmatch_t match;
|
||||||
regoff_t offset;
|
|
||||||
char *linebuf;
|
|
||||||
char save;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
len = inptr - start;
|
len = inptr - start;
|
||||||
linebuf = g_malloc (len + 1);
|
|
||||||
memcpy (linebuf, start, len);
|
|
||||||
linebuf[len] = '\0';
|
|
||||||
|
|
||||||
start = linebuf;
|
|
||||||
save = '\0';
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
/* search for all of our patterns */
|
if (camel_url_scanner_scan (html->scanner, start, len, &match)) {
|
||||||
offset = 0;
|
|
||||||
fmatch = NULL;
|
|
||||||
for (i = 0; i < NUM_URL_REGEX_PATTERNS; i++) {
|
|
||||||
pat = html->patterns + i;
|
|
||||||
if ((html->flags & pat->mask) &&
|
|
||||||
!regexec (pat->preg, start, 1, &pat->matches, 0)) {
|
|
||||||
if (pat->matches.rm_so < offset) {
|
|
||||||
*(start + offset) = save;
|
|
||||||
fmatch = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!fmatch) {
|
|
||||||
fmatch = pat;
|
|
||||||
offset = pat->matches.rm_so;
|
|
||||||
|
|
||||||
/* optimisation so we don't have to search the
|
|
||||||
entire line buffer for the next pattern */
|
|
||||||
save = *(start + offset);
|
|
||||||
*(start + offset) = '\0';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fmatch) {
|
|
||||||
/* restore our char */
|
|
||||||
*(start + offset) = save;
|
|
||||||
|
|
||||||
/* write out anything before the first regex match */
|
/* write out anything before the first regex match */
|
||||||
outptr = writeln (filter, start, start + offset, outptr, &outend);
|
outptr = writeln (filter, start, start + match.um_so,
|
||||||
start += offset;
|
outptr, &outend);
|
||||||
len -= offset;
|
|
||||||
|
|
||||||
#define MATCHLEN(matches) (matches.rm_eo - matches.rm_so)
|
start += match.um_so;
|
||||||
matchlen = MATCHLEN (fmatch->matches);
|
len -= match.um_so;
|
||||||
|
|
||||||
i = 20 + strlen (fmatch->prefix) + matchlen + matchlen;
|
matchlen = match.um_eo - match.um_so;
|
||||||
outptr = check_size (filter, outptr, &outend, i);
|
|
||||||
|
buflen = 20 + strlen (match.prefix) + matchlen + matchlen;
|
||||||
|
outptr = check_size (filter, outptr, &outend, buflen);
|
||||||
|
|
||||||
/* write out the href tag */
|
/* write out the href tag */
|
||||||
outptr = g_stpcpy (outptr, "<a href=\"");
|
outptr = g_stpcpy (outptr, "<a href=\"");
|
||||||
outptr = g_stpcpy (outptr, fmatch->prefix);
|
outptr = g_stpcpy (outptr, match.prefix);
|
||||||
memcpy (outptr, start, matchlen);
|
memcpy (outptr, start, matchlen);
|
||||||
outptr += matchlen;
|
outptr += matchlen;
|
||||||
outptr = g_stpcpy (outptr, "\">");
|
outptr = g_stpcpy (outptr, "\">");
|
||||||
@ -356,8 +300,6 @@ html_convert (CamelMimeFilter *filter, char *in, size_t inlen, size_t prespace,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} while (len > 0);
|
} while (len > 0);
|
||||||
|
|
||||||
g_free (linebuf);
|
|
||||||
} else {
|
} else {
|
||||||
outptr = writeln (filter, start, inptr, outptr, &outend);
|
outptr = writeln (filter, start, inptr, outptr, &outend);
|
||||||
}
|
}
|
||||||
@ -448,11 +390,17 @@ CamelMimeFilter *
|
|||||||
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
|
camel_mime_filter_tohtml_new (guint32 flags, guint32 colour)
|
||||||
{
|
{
|
||||||
CamelMimeFilterToHTML *new;
|
CamelMimeFilterToHTML *new;
|
||||||
|
int i;
|
||||||
|
|
||||||
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
|
new = CAMEL_MIME_FILTER_TOHTML (camel_object_new (camel_mime_filter_tohtml_get_type ()));
|
||||||
|
|
||||||
new->flags = flags;
|
new->flags = flags;
|
||||||
new->colour = colour;
|
new->colour = colour;
|
||||||
|
|
||||||
|
for (i = 0; i < NUM_URL_PATTERNS; i++) {
|
||||||
|
if (patterns[i].mask & flags)
|
||||||
|
camel_url_scanner_add (new->scanner, &patterns[i].pattern);
|
||||||
|
}
|
||||||
|
|
||||||
return CAMEL_MIME_FILTER (new);
|
return CAMEL_MIME_FILTER (new);
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ typedef struct _CamelMimeFilterToHTML CamelMimeFilterToHTML;
|
|||||||
struct _CamelMimeFilterToHTML {
|
struct _CamelMimeFilterToHTML {
|
||||||
CamelMimeFilter parent;
|
CamelMimeFilter parent;
|
||||||
|
|
||||||
struct _UrlRegexPattern *patterns;
|
struct _CamelUrlScanner *scanner;
|
||||||
|
|
||||||
guint32 flags;
|
guint32 flags;
|
||||||
guint32 colour;
|
guint32 colour;
|
||||||
|
Reference in New Issue
Block a user