Fixed to not be fooled in the case where the address is followed

2002-12-09  Jeffrey Stedfast  <fejj@ximian.com>

	* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be
	fooled in the case where the address is followed immediately by a
	period.
	(camel_url_web_end): Made more robust.
	(camel_url_scanner_scan): Oops. We need to set the match->pattern
	string pointer to the correct pattern before executing the
	start/end methods (as some of them rely on this info).

svn path=/trunk/; revision=19077
This commit is contained in:
Jeffrey Stedfast
2002-12-10 03:44:17 +00:00
committed by Jeffrey Stedfast
parent bdd9cae5e9
commit ed25ea4d31
2 changed files with 60 additions and 35 deletions

View File

@ -1,3 +1,13 @@
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-url-scanner.c (camel_url_addrspec_end): Fixed to not be
fooled in the case where the address is followed immediately by a
period.
(camel_url_web_end): Made more robust.
(camel_url_scanner_scan): Oops. We need to set the match->pattern
string pointer to the correct pattern before executing the
start/end methods (as some of them rely on this info).
2002-12-09 Jeffrey Stedfast <fejj@ximian.com>
* camel-url-scanner.c: New code to scan for patterns (used only

View File

@ -86,6 +86,9 @@ camel_url_scanner_scan (CamelUrlScanner *scanner, const char *in, size_t inlen,
pat = g_ptr_array_index (scanner->patterns, pattern);
match->pattern = pat->pattern;
match->prefix = pat->prefix;
inend = in + inlen;
if (!pat->start (in, pos, inend, match))
return FALSE;
@ -93,9 +96,6 @@ camel_url_scanner_scan (CamelUrlScanner *scanner, const char *in, size_t inlen,
if (!pat->end (in, pos, inend, match))
return FALSE;
match->pattern = pat->pattern;
match->prefix = pat->prefix;
return TRUE;
}
@ -103,12 +103,12 @@ camel_url_scanner_scan (CamelUrlScanner *scanner, const char *in, size_t inlen,
static unsigned char url_scanner_table[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 1, 1, 9, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
24, 64, 96, 64, 64, 64, 64, 64, 96, 96, 64, 64, 96, 64, 96, 96,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 96, 96, 32, 64, 32, 64,
96, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 96, 96, 96, 64, 64,
64, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 64, 64, 64, 64, 1,
24,128,160,128,128,128,128,128,160,160,128,128,160,192,160,160,
68, 68, 68, 68, 68, 68, 68, 68, 68, 68,160,160, 32,128, 32,128,
160, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,160,160,160,128,128,
128, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,
66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66,128,128,128,128, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -126,7 +126,8 @@ enum {
IS_LWSP = (1 << 3),
IS_SPACE = (1 << 4),
IS_SPECIAL = (1 << 5),
IS_URLSAFE = (1 << 6),
IS_DOMAIN = (1 << 6),
IS_URLSAFE = (1 << 7),
};
#define is_ctrl(x) ((url_scanner_table[(unsigned char)(x)] & IS_CTRL) != 0)
@ -134,7 +135,7 @@ enum {
#define is_atom(x) ((url_scanner_table[(unsigned char)(x)] & (IS_SPECIAL|IS_SPACE|IS_CTRL)) == 0)
#define is_alpha(x) ((url_scanner_table[(unsigned char)(x)] & IS_ALPHA) != 0)
#define is_digit(x) ((url_scanner_table[(unsigned char)(x)] & IS_DIGIT) != 0)
#define is_domain(x) ((url_scanner_table[(unsigned char)(x)] & (IS_ALPHA|IS_DIGIT)) != 0 || (x) == '-')
#define is_domain(x) ((url_scanner_table[(unsigned char)(x)] & IS_DOMAIN) != 0)
#define is_urlsafe(x) ((url_scanner_table[(unsigned char)(x)] & (IS_ALPHA|IS_DIGIT|IS_URLSAFE)) != 0)
@ -212,7 +213,7 @@ camel_url_addrspec_end (const char *in, const char *pos, const char *inend, urlm
while (inptr < inend && is_domain (*inptr))
inptr++;
if (inptr < inend && *inptr == '.')
if (inptr < inend && *inptr == '.' && is_domain (inptr[1]))
inptr++;
}
}
@ -289,35 +290,48 @@ camel_url_web_end (const char *in, const char *pos, const char *inend, urlmatch_
} while (parts < 4);
} else if (is_domain (*inptr)) {
do {
while (inptr < inend && is_domain (*inptr))
inptr++;
if (inptr < inend && *inptr == '.')
while (inptr < inend) {
if (is_domain (*inptr))
inptr++;
else
break;
} while (inptr < inend);
while (inptr < inend && is_domain (*inptr))
inptr++;
if (inptr < inend && *inptr == '.' && is_domain (inptr[1]))
inptr++;
}
} else {
return FALSE;
}
if (inptr < inend && *inptr == ':') {
/* skip past the port */
inptr++;
port = 0;
while (inptr < inend && is_digit (*inptr) && port < 65536)
port = (port * 10) + (*inptr++ - '0');
}
if (inptr < inend && *inptr == '/') {
/* skip past our url path */
inptr++;
while (inptr < inend && is_urlsafe (*inptr))
if (inptr < inend) {
switch (*inptr) {
case ':': /* port notation */
inptr++;
port = 0;
while (inptr < inend && is_digit (*inptr) && port < 65536)
port = (port * 10) + (*inptr++ - '0');
if (port >= 65536)
inptr--;
if (inptr >= inend || *inptr != '/')
break;
/* we have a '/' so there could be a path - fall through */
case '/': /* we've detected a path component to our url */
inptr++;
while (inptr < inend && is_urlsafe (*inptr))
inptr++;
break;
default:
break;
}
}
match->um_eo = (inptr - in);
@ -358,17 +372,18 @@ url_scanner_table_init (void)
if (i < 32)
url_scanner_table[i] |= IS_CTRL;
if ((i >= '0' && i <= '9'))
url_scanner_table[i] |= IS_DIGIT;
url_scanner_table[i] |= IS_DIGIT | IS_DOMAIN;
if ((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z'))
url_scanner_table[i] |= IS_ALPHA;
url_scanner_table[i] |= IS_ALPHA | IS_DOMAIN;
}
url_scanner_table[127] |= IS_CTRL;
url_scanner_table[' '] |= IS_SPACE;
url_scanner_table['-'] |= IS_DOMAIN;
/* not defined to be special in rfc0822, but when scanning
backwards to find the beginning of the email address we do
not want to incldue this char if we come accross it - so
not want to include this char if we come accross it - so
this is kind of a hack */
url_scanner_table['/'] |= IS_SPECIAL;