554 lines
13 KiB
C
554 lines
13 KiB
C
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
|
|
/* camel-url.c : utility functions to parse URLs */
|
|
|
|
/*
|
|
* Authors:
|
|
* Dan Winship <danw@ximian.com>
|
|
* Tiago Antào <tiagoantao@bigfoot.com>
|
|
* Jeffrey Stedfast <fejj@ximian.com>
|
|
*
|
|
* Copyright 1999-2001 Ximian, Inc. (www.ximian.com)
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
|
* USA
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include <config.h>
|
|
#endif
|
|
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "camel-url.h"
|
|
#include "camel-exception.h"
|
|
#include "camel-mime-utils.h"
|
|
#include "camel-object.h"
|
|
|
|
static void copy_param (GQuark key_id, gpointer data, gpointer user_data);
|
|
static void output_param (GQuark key_id, gpointer data, gpointer user_data);
|
|
|
|
/**
|
|
* camel_url_new_with_base:
|
|
* @base: a base URL
|
|
* @url_string: the URL
|
|
*
|
|
* Parses @url_string relative to @base.
|
|
*
|
|
* Return value: a parsed CamelURL.
|
|
**/
|
|
CamelURL *
|
|
camel_url_new_with_base (CamelURL *base, const char *url_string)
|
|
{
|
|
CamelURL *url;
|
|
const char *end, *hash, *colon, *semi, *at, *slash, *question;
|
|
const char *p;
|
|
|
|
url = g_new0 (CamelURL, 1);
|
|
|
|
/* See RFC1808 for details. IF YOU CHANGE ANYTHING IN THIS
|
|
* FUNCTION, RUN tests/misc/url AFTERWARDS.
|
|
*/
|
|
|
|
/* Find fragment. */
|
|
end = hash = strchr (url_string, '#');
|
|
if (hash && hash[1]) {
|
|
url->fragment = g_strdup (hash + 1);
|
|
camel_url_decode (url->fragment);
|
|
} else
|
|
end = url_string + strlen (url_string);
|
|
|
|
/* Find protocol: initial [a-z+.-]* substring until ":" */
|
|
p = url_string;
|
|
while (p < end && (isalnum ((unsigned char)*p) ||
|
|
*p == '.' || *p == '+' || *p == '-'))
|
|
p++;
|
|
|
|
if (p > url_string && *p == ':') {
|
|
url->protocol = g_strndup (url_string, p - url_string);
|
|
g_strdown (url->protocol);
|
|
url_string = p + 1;
|
|
}
|
|
|
|
if (!*url_string && !base)
|
|
return url;
|
|
|
|
/* Check for authority */
|
|
if (strncmp (url_string, "//", 2) == 0) {
|
|
url_string += 2;
|
|
|
|
slash = url_string + strcspn (url_string, "/#");
|
|
at = strchr (url_string, '@');
|
|
if (at && at < slash) {
|
|
colon = strchr (url_string, ':');
|
|
if (colon && colon < at) {
|
|
url->passwd = g_strndup (colon + 1,
|
|
at - colon - 1);
|
|
camel_url_decode (url->passwd);
|
|
} else {
|
|
url->passwd = NULL;
|
|
colon = at;
|
|
}
|
|
|
|
semi = strchr(url_string, ';');
|
|
if (semi && semi < colon &&
|
|
!strncasecmp (semi, ";auth=", 6)) {
|
|
url->authmech = g_strndup (semi + 6,
|
|
colon - semi - 6);
|
|
camel_url_decode (url->authmech);
|
|
} else {
|
|
url->authmech = NULL;
|
|
semi = colon;
|
|
}
|
|
|
|
url->user = g_strndup (url_string, semi - url_string);
|
|
camel_url_decode (url->user);
|
|
url_string = at + 1;
|
|
} else
|
|
url->user = url->passwd = url->authmech = NULL;
|
|
|
|
/* Find host and port. */
|
|
colon = strchr (url_string, ':');
|
|
if (colon && colon < slash) {
|
|
url->host = g_strndup (url_string, colon - url_string);
|
|
url->port = strtoul (colon + 1, NULL, 10);
|
|
} else {
|
|
url->host = g_strndup (url_string, slash - url_string);
|
|
camel_url_decode (url->host);
|
|
url->port = 0;
|
|
}
|
|
|
|
url_string = slash;
|
|
}
|
|
|
|
/* Find query */
|
|
question = memchr (url_string, '?', end - url_string);
|
|
if (question) {
|
|
if (question[1]) {
|
|
url->query = g_strndup (question + 1,
|
|
end - (question + 1));
|
|
camel_url_decode (url->query);
|
|
}
|
|
end = question;
|
|
}
|
|
|
|
/* Find parameters */
|
|
semi = memchr (url_string, ';', end - url_string);
|
|
if (semi) {
|
|
if (semi[1]) {
|
|
const char *cur, *p, *eq;
|
|
char *name, *value;
|
|
|
|
for (cur = semi + 1; cur < end; cur = p + 1) {
|
|
p = memchr (cur, ';', end - cur);
|
|
if (!p)
|
|
p = end;
|
|
eq = memchr (cur, '=', p - cur);
|
|
if (eq) {
|
|
name = g_strndup (cur, eq - cur);
|
|
value = g_strndup (eq + 1, p - (eq + 1));
|
|
camel_url_decode (value);
|
|
} else {
|
|
name = g_strndup (cur, p - cur);
|
|
value = g_strdup ("");
|
|
}
|
|
camel_url_decode (name);
|
|
g_datalist_set_data_full (&url->params, name,
|
|
value, g_free);
|
|
g_free (name);
|
|
}
|
|
}
|
|
end = semi;
|
|
}
|
|
|
|
if (end != url_string) {
|
|
url->path = g_strndup (url_string, end - url_string);
|
|
camel_url_decode (url->path);
|
|
}
|
|
|
|
/* Apply base URL. Again, this is spelled out in RFC 1808. */
|
|
if (base && !url->protocol && url->host)
|
|
url->protocol = g_strdup (base->protocol);
|
|
else if (base && !url->protocol) {
|
|
if (!url->user && !url->authmech && !url->passwd &&
|
|
!url->host && !url->port && !url->path &&
|
|
!url->params && !url->query && !url->fragment)
|
|
url->fragment = g_strdup (base->fragment);
|
|
|
|
url->protocol = g_strdup (base->protocol);
|
|
url->user = g_strdup (base->user);
|
|
url->authmech = g_strdup (base->authmech);
|
|
url->passwd = g_strdup (base->passwd);
|
|
url->host = g_strdup (base->host);
|
|
url->port = base->port;
|
|
|
|
if (!url->path) {
|
|
url->path = g_strdup (base->path);
|
|
if (!url->params) {
|
|
g_datalist_foreach (&base->params, copy_param,
|
|
&url->params);
|
|
if (!url->query)
|
|
url->query = g_strdup (base->query);
|
|
}
|
|
} else if (*url->path != '/') {
|
|
char *newpath, *last, *p, *q;
|
|
|
|
last = strrchr (base->path, '/');
|
|
if (last) {
|
|
newpath = g_strdup_printf ("%.*s/%s",
|
|
last - base->path,
|
|
base->path,
|
|
url->path);
|
|
} else
|
|
newpath = g_strdup_printf ("/%s", url->path);
|
|
|
|
/* Remove "./" where "." is a complete segment. */
|
|
for (p = newpath + 1; *p; ) {
|
|
if (*(p - 1) == '/' &&
|
|
*p == '.' && *(p + 1) == '/')
|
|
memmove (p, p + 2, strlen (p + 2) + 1);
|
|
else
|
|
p++;
|
|
}
|
|
/* Remove "." at end. */
|
|
if (p > newpath + 2 &&
|
|
*(p - 1) == '.' && *(p - 2) == '/')
|
|
*(p - 1) = '\0';
|
|
/* Remove "<segment>/../" where <segment> != ".." */
|
|
for (p = newpath + 1; *p; ) {
|
|
if (!strncmp (p, "../", 3)) {
|
|
p += 3;
|
|
continue;
|
|
}
|
|
q = strchr (p + 1, '/');
|
|
if (!q)
|
|
break;
|
|
if (strncmp (q, "/../", 4) != 0) {
|
|
p = q + 1;
|
|
continue;
|
|
}
|
|
memmove (p, q + 4, strlen (q + 4) + 1);
|
|
p = newpath + 1;
|
|
}
|
|
/* Remove "<segment>/.." at end */
|
|
q = strrchr (newpath, '/');
|
|
if (q && !strcmp (q, "/..")) {
|
|
p = q - 1;
|
|
while (p > newpath && *p != '/')
|
|
p--;
|
|
if (strncmp (p, "/../", 4) != 0)
|
|
*(p + 1) = 0;
|
|
}
|
|
g_free (url->path);
|
|
url->path = newpath;
|
|
}
|
|
}
|
|
|
|
return url;
|
|
}
|
|
|
|
static void
|
|
copy_param (GQuark key_id, gpointer data, gpointer user_data)
|
|
{
|
|
GData **copy = user_data;
|
|
|
|
g_datalist_id_set_data_full (copy, key_id, g_strdup (data), g_free);
|
|
}
|
|
|
|
/**
|
|
* camel_url_new:
|
|
* @url_string: a URL
|
|
* @ex: a CamelException
|
|
*
|
|
* Parses an absolute URL.
|
|
*
|
|
* Return value: a CamelURL, or %NULL.
|
|
**/
|
|
CamelURL *
|
|
camel_url_new (const char *url_string, CamelException *ex)
|
|
{
|
|
CamelURL *url = camel_url_new_with_base (NULL, url_string);
|
|
|
|
if (!url->protocol) {
|
|
camel_url_free (url);
|
|
camel_exception_setv (ex, CAMEL_EXCEPTION_SERVICE_URL_INVALID,
|
|
_("Could not parse URL `%s'"),
|
|
url_string);
|
|
return NULL;
|
|
}
|
|
return url;
|
|
}
|
|
|
|
/**
|
|
* camel_url_to_string:
|
|
* @url: a CamelURL
|
|
* @flags: additional translation options.
|
|
*
|
|
* Return value: a string representing @url, which the caller must free.
|
|
**/
|
|
char *
|
|
camel_url_to_string (CamelURL *url, guint32 flags)
|
|
{
|
|
GString *str;
|
|
char *enc, *return_result;
|
|
|
|
/* IF YOU CHANGE ANYTHING IN THIS FUNCTION, RUN
|
|
* tests/misc/url AFTERWARD.
|
|
*/
|
|
|
|
str = g_string_sized_new (20);
|
|
|
|
if (url->protocol)
|
|
g_string_sprintfa (str, "%s:", url->protocol);
|
|
if (url->host) {
|
|
g_string_append (str, "//");
|
|
if (url->user) {
|
|
enc = camel_url_encode (url->user, TRUE, ":;@/");
|
|
g_string_append (str, enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->authmech && *url->authmech) {
|
|
enc = camel_url_encode (url->authmech, TRUE, ":@/");
|
|
g_string_sprintfa (str, ";auth=%s", enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->passwd && !(flags & CAMEL_URL_HIDE_PASSWORD)) {
|
|
enc = camel_url_encode (url->passwd, TRUE, "@/");
|
|
g_string_sprintfa (str, ":%s", enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->host) {
|
|
enc = camel_url_encode (url->host, TRUE, ":/");
|
|
g_string_sprintfa (str, "%s%s", url->user ? "@" : "", enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->port)
|
|
g_string_sprintfa (str, ":%d", url->port);
|
|
if (!url->path && (url->params || url->query || url->fragment))
|
|
g_string_append_c (str, '/');
|
|
}
|
|
|
|
if (url->path) {
|
|
enc = camel_url_encode (url->path, FALSE, ";?#");
|
|
g_string_sprintfa (str, "%s", enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->params && !(flags & CAMEL_URL_HIDE_PARAMS))
|
|
g_datalist_foreach (&url->params, output_param, str);
|
|
if (url->query) {
|
|
enc = camel_url_encode (url->query, FALSE, "#");
|
|
g_string_sprintfa (str, "?%s", enc);
|
|
g_free (enc);
|
|
}
|
|
if (url->fragment) {
|
|
enc = camel_url_encode (url->fragment, FALSE, NULL);
|
|
g_string_sprintfa (str, "#%s", enc);
|
|
g_free (enc);
|
|
}
|
|
|
|
return_result = str->str;
|
|
g_string_free (str, FALSE);
|
|
return return_result;
|
|
}
|
|
|
|
static void
|
|
output_param (GQuark key_id, gpointer data, gpointer user_data)
|
|
{
|
|
GString *str = user_data;
|
|
char *enc;
|
|
|
|
enc = camel_url_encode (g_quark_to_string (key_id), FALSE, "?#");
|
|
g_string_sprintfa (str, ";%s", enc);
|
|
g_free (enc);
|
|
if (*(char *)data) {
|
|
enc = camel_url_encode (data, FALSE, "?#");
|
|
g_string_sprintfa (str, "=%s", enc);
|
|
g_free (enc);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* camel_url_free:
|
|
* @url: a CamelURL
|
|
*
|
|
* Frees @url
|
|
**/
|
|
void
|
|
camel_url_free (CamelURL *url)
|
|
{
|
|
if (url) {
|
|
g_free (url->protocol);
|
|
g_free (url->user);
|
|
g_free (url->authmech);
|
|
g_free (url->passwd);
|
|
g_free (url->host);
|
|
g_free (url->path);
|
|
g_datalist_clear (&url->params);
|
|
g_free (url->query);
|
|
g_free (url->fragment);
|
|
|
|
g_free (url);
|
|
}
|
|
}
|
|
|
|
|
|
#define DEFINE_CAMEL_URL_SET(part) \
|
|
void \
|
|
camel_url_set_##part (CamelURL *url, const char *part) \
|
|
{ \
|
|
g_free (url->part); \
|
|
url->part = g_strdup (part); \
|
|
}
|
|
|
|
DEFINE_CAMEL_URL_SET (protocol)
|
|
DEFINE_CAMEL_URL_SET (user)
|
|
DEFINE_CAMEL_URL_SET (authmech)
|
|
DEFINE_CAMEL_URL_SET (passwd)
|
|
DEFINE_CAMEL_URL_SET (host)
|
|
DEFINE_CAMEL_URL_SET (path)
|
|
DEFINE_CAMEL_URL_SET (query)
|
|
DEFINE_CAMEL_URL_SET (fragment)
|
|
|
|
void
|
|
camel_url_set_port (CamelURL *url, int port)
|
|
{
|
|
url->port = port;
|
|
}
|
|
|
|
void
|
|
camel_url_set_param (CamelURL *url, const char *name, const char *value)
|
|
{
|
|
g_datalist_set_data_full (&url->params, name, value ? g_strdup (value) : NULL, g_free);
|
|
}
|
|
|
|
const char *
|
|
camel_url_get_param (CamelURL *url, const char *name)
|
|
{
|
|
return g_datalist_get_data (&url->params, name);
|
|
}
|
|
|
|
|
|
/**
|
|
* camel_url_encode:
|
|
* @part: a URL part
|
|
* @escape_unsafe: whether or not to %-escape "unsafe" characters.
|
|
* ("%#<>{}|\^~[]`)
|
|
* @escape_extra: additional characters to escape.
|
|
*
|
|
* This %-encodes the given URL part and returns the escaped version
|
|
* in allocated memory, which the caller must free when it is done.
|
|
**/
|
|
char *
|
|
camel_url_encode (const char *part, gboolean escape_unsafe,
|
|
const char *escape_extra)
|
|
{
|
|
char *work, *p;
|
|
|
|
/* worst case scenario = 3 times the initial */
|
|
p = work = g_malloc (3 * strlen (part) + 1);
|
|
|
|
while (*part) {
|
|
if (((guchar) *part >= 127) || ((guchar) *part <= ' ') ||
|
|
(escape_unsafe && strchr ("\"%#<>{}|\\^~[]`", *part)) ||
|
|
(escape_extra && strchr (escape_extra, *part))) {
|
|
sprintf (p, "%%%.02hX", (guchar) *part++);
|
|
p += 3;
|
|
} else
|
|
*p++ = *part++;
|
|
}
|
|
*p = '\0';
|
|
|
|
return work;
|
|
}
|
|
|
|
#define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
|
|
|
|
/**
|
|
* camel_url_decode:
|
|
* @part: a URL part
|
|
*
|
|
* %-decodes the passed-in URL *in place*. The decoded version is
|
|
* never longer than the encoded version, so there does not need to
|
|
* be any additional space at the end of the string.
|
|
*/
|
|
void
|
|
camel_url_decode (char *part)
|
|
{
|
|
guchar *s, *d;
|
|
|
|
s = d = (guchar *)part;
|
|
while (*s) {
|
|
if (*s == '%') {
|
|
if (isxdigit (s[1]) && isxdigit (s[2])) {
|
|
*d++ = HEXVAL (s[1]) * 16 + HEXVAL (s[2]);
|
|
s += 3;
|
|
} else
|
|
*d++ = *s++;
|
|
} else
|
|
*d++ = *s++;
|
|
}
|
|
*d = '\0';
|
|
}
|
|
|
|
guint
|
|
camel_url_hash (const void *v)
|
|
{
|
|
const CamelURL *u = v;
|
|
guint hash = 0;
|
|
|
|
#define ADD_HASH(s) if (s) hash ^= g_str_hash (s);
|
|
|
|
ADD_HASH (u->protocol);
|
|
ADD_HASH (u->user);
|
|
ADD_HASH (u->authmech);
|
|
ADD_HASH (u->host);
|
|
ADD_HASH (u->path);
|
|
ADD_HASH (u->query);
|
|
hash ^= u->port;
|
|
|
|
return hash;
|
|
}
|
|
|
|
static int
|
|
check_equal (char *s1, char *s2)
|
|
{
|
|
if (s1 == NULL) {
|
|
if (s2 == NULL)
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
if (s2 == NULL)
|
|
return FALSE;
|
|
|
|
return strcmp (s1, s2) == 0;
|
|
}
|
|
|
|
int
|
|
camel_url_equal(const void *v, const void *v2)
|
|
{
|
|
const CamelURL *u1 = v, *u2 = v2;
|
|
|
|
return check_equal(u1->protocol, u2->protocol)
|
|
&& check_equal(u1->user, u2->user)
|
|
&& check_equal(u1->authmech, u2->authmech)
|
|
&& check_equal(u1->host, u2->host)
|
|
&& check_equal(u1->path, u2->path)
|
|
&& check_equal(u1->query, u2->query)
|
|
&& u1->port == u2->port;
|
|
}
|