416 lines
9.2 KiB
C
416 lines
9.2 KiB
C
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
|
|
/* url-util.c : utility functions to parse URLs */
|
|
|
|
|
|
/*
|
|
* Copyright (C) 1999 Bertrand Guiheneuf <Bertrand.Guiheneuf@aful.org>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License as
|
|
* published by the Free Software Foundation; either version 2 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
|
|
* USA
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
Here we deal with URL following the general scheme:
|
|
protocol://user:password@host:port/name
|
|
where name is a path-like string (ie dir1/dir2/....)
|
|
See rfc1738 for the complete description of
|
|
Uniform Ressource Locators
|
|
|
|
Bertrand. */
|
|
/*
|
|
XXX TODO: recover the words between #'s or ?'s after the path */
|
|
|
|
#include <config.h>
|
|
#include "url-util.h"
|
|
|
|
/* general item finder */
|
|
/* it begins the search at position @position in @url,
|
|
returns true when the item is found, amd set position after the item */
|
|
typedef gboolean find_item_func(const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
|
|
/* used to find one item (protocol, then user .... */
|
|
typedef struct {
|
|
char *item_name; /* item name : for debug only */
|
|
gchar **item_value; /* where the item value will go */
|
|
find_item_func *find_func; /* item finder */
|
|
} FindStepStruct;
|
|
|
|
static gboolean _find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
static gboolean _find_user (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
static gboolean _find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
static gboolean _find_host (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
static gboolean _find_port (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
static gboolean _find_path (const gchar *url, gchar **item, guint *position, gboolean *error);
|
|
|
|
|
|
|
|
/**
|
|
* g_url_new: create an Gurl object from a string
|
|
*
|
|
* @url_string: The string containing the URL to scan
|
|
*
|
|
* This routine takes a gchar and parses it as an
|
|
* URL of the form:
|
|
* protocol://user:password@host:port/path
|
|
* there is no test on the values. For example,
|
|
* "port" can be a string, not only a number !
|
|
* The Gurl structure fields ar filled with
|
|
* the scan results. When a member of the
|
|
* general URL can not be found, the corresponding
|
|
* Gurl member is NULL
|
|
* Fields filled in the Gurl structure are allocated
|
|
* and url_string is not modified.
|
|
*
|
|
* Return value: a Gurl structure containing the URL items.
|
|
**/
|
|
Gurl *g_url_new (const gchar* url_string)
|
|
{
|
|
Gurl *g_url;
|
|
|
|
gchar *protocol;
|
|
gchar *user;
|
|
gchar *passwd;
|
|
gchar *host;
|
|
gchar *port;
|
|
gchar *path;
|
|
|
|
guint position = 0;
|
|
gboolean error;
|
|
gboolean found;
|
|
guint i;
|
|
|
|
g_url = g_new (Gurl,1);
|
|
|
|
#define NB_STEP_URL 6
|
|
{
|
|
FindStepStruct step[NB_STEP_URL] = {
|
|
{ "protocol", &(g_url->protocol), _find_protocol},
|
|
{ "user", &(g_url->user), _find_user},
|
|
{ "password", &(g_url->passwd), _find_passwd},
|
|
{ "host", &(g_url->host), _find_host},
|
|
{ "port", &(g_url->port), _find_port},
|
|
{ "path", &(g_url->path), _find_path}
|
|
};
|
|
|
|
for (i = 0; i < NB_STEP_URL; i++) {
|
|
found = step[i].find_func (url_string,
|
|
step[i].item_value,
|
|
&position,
|
|
&error);
|
|
}
|
|
}
|
|
|
|
return g_url;
|
|
}
|
|
|
|
|
|
|
|
void
|
|
g_url_free (Gurl *url)
|
|
{
|
|
g_assert (url);
|
|
|
|
if (url->protocol) g_free (url->protocol);
|
|
if (url->user) g_free (url->user);
|
|
if (url->passwd) g_free (url->passwd);
|
|
if (url->host) g_free (url->host);
|
|
if (url->port) g_free (url->port);
|
|
if (url->path) g_free (url->path);
|
|
|
|
g_free (url);
|
|
|
|
}
|
|
|
|
/**** PARSING FUNCTIONS ****/
|
|
|
|
/* So, yes, I must admit there would have been more elegant
|
|
ways to do this, but it works, and quite well :) */
|
|
|
|
|
|
static gboolean
|
|
_find_protocol (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
|
|
guint i;
|
|
gint len_url;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
|
|
*item = NULL;
|
|
*error = FALSE;
|
|
i = *position;
|
|
|
|
/* find a ':' */
|
|
while ((i < len_url) && (url[i] != ':')) i++;
|
|
|
|
if (i == len_url) return FALSE;
|
|
i++;
|
|
|
|
/* check if it is followed by a "//" */
|
|
if ((i < len_url) && (url[i++] == '/'))
|
|
if ((i < len_url) && (url[i++] == '/'))
|
|
{
|
|
*item = g_strndup (url, i-3);
|
|
*position = i;
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean
|
|
_find_user (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
guint i;
|
|
guint at_pos;
|
|
gint len_url;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
*item = NULL;
|
|
i = *position;
|
|
|
|
/* find a '@' */
|
|
while ((i < len_url) && (url[i] != '@')) i++;
|
|
|
|
if (i == len_url) return FALSE;
|
|
at_pos = i;
|
|
i = *position;
|
|
|
|
/* find a ':' */
|
|
while ((i < at_pos) && (url[i] != ':')) i++;
|
|
|
|
/* now if i has not been incremented at all, there is no user */
|
|
if (i == *position) {
|
|
(*position)++;
|
|
return FALSE;
|
|
}
|
|
|
|
*item = g_strndup (url+ *position, i - *position);
|
|
if (i < at_pos) *position = i + 1; /* there was a ':', skip it */
|
|
else *position = i;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
_find_passwd (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
guint i;
|
|
gint len_url;
|
|
gchar *str_passwd;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
*item = NULL;
|
|
i = *position;
|
|
|
|
/* find a '@' */
|
|
while ((i < len_url) && (url[i] != '@')) i++;
|
|
|
|
if (i == len_url) return FALSE;
|
|
/*i has not been incremented at all, there is no passwd */
|
|
if (i == *position) {
|
|
*position = i + 1;
|
|
return FALSE;
|
|
}
|
|
|
|
*item = g_strndup (url + *position, i - *position);
|
|
*position = i + 1; /* skip it the '@' */
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
static gboolean
|
|
_find_host (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
guint i;
|
|
guint slash_pos;
|
|
gint len_url;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
*item = NULL;
|
|
i = *position;
|
|
|
|
/* find a '/' */
|
|
while ((i < len_url) && (url[i] != '/')) i++;
|
|
|
|
slash_pos = i;
|
|
i = *position;
|
|
|
|
/* find a ':' */
|
|
while ( (i < slash_pos) && (url[i] != ':') ) i++;
|
|
|
|
/* at this point if i has not been incremented at all,
|
|
there is no host */
|
|
if (i == *position) {
|
|
/* if we have not met / or \0, we have : and must skip it */
|
|
if (i < slash_pos) (*position)++;
|
|
return FALSE;
|
|
}
|
|
|
|
*item = g_strndup (url + *position, i - *position);
|
|
if (i < slash_pos) *position = i + 1; /* there was a ':', skip it */
|
|
else *position=i;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static gboolean
|
|
_find_port (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
guint i;
|
|
guint slash_pos;
|
|
gint len_url;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
*item = NULL;
|
|
i=*position;
|
|
|
|
/* find a '/' */
|
|
while ((i < len_url) && (url[i] != '/')) i++;
|
|
|
|
slash_pos = i;
|
|
i = *position;
|
|
|
|
/* find a ':' */
|
|
while ((i < slash_pos) && (url[i] != ':')) i++;
|
|
|
|
/* at this point if i has not been incremented at all, */
|
|
/* there is no port */
|
|
if (i == *position) return FALSE;
|
|
|
|
*item = g_strndup (url+ *position, i - *position);
|
|
*position = i;
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
static gboolean
|
|
_find_path (const gchar *url, gchar **item, guint *position, gboolean *error)
|
|
{
|
|
guint i;
|
|
gint len_url;
|
|
|
|
g_assert (url);
|
|
g_assert (item);
|
|
g_assert (position);
|
|
|
|
len_url = strlen (url);
|
|
*item = NULL;
|
|
i = *position;
|
|
|
|
|
|
/* find a '#' */
|
|
while ((i < len_url) && (url[i] != '#') && (url[i] != '?')) i++;
|
|
|
|
/*i has not been incremented at all, there is no path */
|
|
if (i == *position) return FALSE;
|
|
|
|
*item = g_strndup (url + *position, i - *position);
|
|
*position=i;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**** TEST ROUTINE - NOT COMPILED BY DEFAULT ****/
|
|
|
|
/* to tests this file :
|
|
gcc -o test_url_util `glib-config --cflags` -I.. -DTEST_URL_UTIL url-util.c `glib-config --libs`
|
|
./test_url_util URL
|
|
*/
|
|
#ifdef TEST_URL_UTIL
|
|
|
|
|
|
|
|
int
|
|
main (int argc, char **argv)
|
|
{
|
|
|
|
gchar *url;
|
|
gchar *protocol;
|
|
gchar *user;
|
|
gchar *passwd;
|
|
gchar *host;
|
|
gchar *port;
|
|
gchar *path;
|
|
guint position=0;
|
|
gboolean error;
|
|
gboolean found;
|
|
guint i;
|
|
guint i_pos;
|
|
|
|
#define NB_STEP_TEST 6
|
|
FindStepStruct test_step[NB_STEP_TEST] = {
|
|
{ "protocol", &protocol, _find_protocol},
|
|
{ "user", &user, _find_user},
|
|
{ "password", &passwd, _find_passwd},
|
|
{ "host", &host, _find_host},
|
|
{ "port", &port, _find_port},
|
|
{ "path", &path, _find_path}
|
|
};
|
|
url = argv[1];
|
|
printf("URL to test : %s\n\n", url);
|
|
for (i=0; i<NB_STEP_TEST; i++) {
|
|
found = test_step[i].find_func (url,
|
|
test_step[i].item_value,
|
|
&position,
|
|
&error);
|
|
if (found) {
|
|
printf ("\t\t\t\t** %s found : %s\n",
|
|
test_step[i].item_name,
|
|
*(test_step[i].item_value));
|
|
} else printf ("** %s not found in URL\n", test_step[i].item_name);
|
|
printf ("next item position:\n");
|
|
printf ("%s\n", url);
|
|
for (i_pos = 0; i_pos < position; i_pos++) printf (" ");
|
|
printf ("^\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* TEST_URL_UTIL */
|