Removed. No longer used by evolution except via evolution-data-server.
* ename/*: Removed. No longer used by evolution except via evolution-data-server. * Makefile.am (SUBDIRS): Remove ename svn path=/trunk/; revision=23232
This commit is contained in:
@ -1,3 +1,10 @@
|
||||
2003-11-07 Dan Winship <danw@ximian.com>
|
||||
|
||||
* ename/*: Removed. No longer used by evolution except via
|
||||
evolution-data-server.
|
||||
|
||||
* Makefile.am (SUBDIRS): Remove ename
|
||||
|
||||
2003-11-07 JP Rosevear <jpr@ximian.com>
|
||||
|
||||
* Makefile.am: remove build sources from dist
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
SUBDIRS = . ename
|
||||
|
||||
eutilincludedir = $(privincludedir)/e-util
|
||||
econdincludedir = $(privincludedir)/e-conduit
|
||||
edb3includedir = $(privincludedir)/e-db3util
|
||||
|
||||
@ -63,6 +63,9 @@ static int special_chars[] = {
|
||||
#define is_trailing_garbage(c) (c > 127 || (special_chars[c] & 2))
|
||||
#define is_domain_name_char(c) (c < 128 && (special_chars[c] & 4))
|
||||
|
||||
/* (http|https|ftp|nntp)://[^ "|/]+\.([^ "|]*[^ ,.!?;:>)\]}`'"|_-])+ */
|
||||
/* www\.[A-Za-z0-9.-]+(/([^ "|]*[^ ,.!?;:>)\]}`'"|_-])+) */
|
||||
|
||||
static char *
|
||||
url_extract (const unsigned char **text, gboolean full_url)
|
||||
{
|
||||
@ -107,6 +110,9 @@ email_address_extract (const unsigned char **cur, char **out, const unsigned cha
|
||||
;
|
||||
if (start == *cur)
|
||||
return NULL;
|
||||
if (start > linestart + 2 &&
|
||||
start[-1] == ':' && start[0] == '/' && start[1] == '/')
|
||||
return NULL;
|
||||
|
||||
/* Now look forward for a valid domain part */
|
||||
for (end = *cur + 1, dot = NULL; is_domain_name_char (*end); end++) {
|
||||
@ -448,6 +454,7 @@ struct {
|
||||
{ "Ends with http://www.foo.com", "http://www.foo.com" },
|
||||
{ "http://www.foo.com at start", "http://www.foo.com" },
|
||||
{ "http://www.foo.com.", "http://www.foo.com" },
|
||||
{ "http://www.foo.com/.", "http://www.foo.com/" },
|
||||
{ "<http://www.foo.com>", "http://www.foo.com" },
|
||||
{ "(http://www.foo.com)", "http://www.foo.com" },
|
||||
{ "http://www.foo.com, 555-9999", "http://www.foo.com" },
|
||||
@ -464,11 +471,14 @@ struct {
|
||||
{ "http://www.foo.com/index.html!", "http://www.foo.com/index.html" },
|
||||
{ "\"http://www.foo.com/index.html\"", "http://www.foo.com/index.html" },
|
||||
{ "'http://www.foo.com/index.html'", "http://www.foo.com/index.html" },
|
||||
{ "http://bob@www.foo.com/bar/baz/", "http://bob@www.foo.com/bar/baz/" },
|
||||
{ "http no match http", NULL },
|
||||
{ "http: no match http:", NULL },
|
||||
{ "http:// no match http://", NULL },
|
||||
{ "unrecognized://bob@foo.com/path", NULL },
|
||||
|
||||
{ "src/www.c", NULL },
|
||||
{ "Ewwwwww.Gross.", NULL },
|
||||
|
||||
};
|
||||
int num_url_tests = G_N_ELEMENTS (url_tests);
|
||||
|
||||
@ -1,8 +0,0 @@
|
||||
.deps
|
||||
.libs
|
||||
Makefile
|
||||
Makefile.in
|
||||
*.lo
|
||||
*.la
|
||||
test-ename-western
|
||||
test-ename-western-gtk
|
||||
@ -1,40 +0,0 @@
|
||||
INCLUDES = \
|
||||
-DG_LOG_DOMAIN=\"EName\" \
|
||||
-I$(srcdir) \
|
||||
-I$(srcdir)/.. \
|
||||
-I$(top_srcdir) \
|
||||
-I. \
|
||||
-I.. \
|
||||
-I$(top_builddir) \
|
||||
$(E_NAME_CFLAGS)
|
||||
|
||||
ename_libs = \
|
||||
libename.la \
|
||||
$(E_NAME_LIBS)
|
||||
|
||||
noinst_LTLIBRARIES = libename.la
|
||||
|
||||
libename_la_SOURCES = \
|
||||
e-address-western.h \
|
||||
e-name-western-tables.h \
|
||||
e-name-western.h \
|
||||
e-address-western.c \
|
||||
e-name-western.c
|
||||
|
||||
noinst_PROGRAMS = \
|
||||
test-ename-western \
|
||||
test-ename-western-gtk
|
||||
|
||||
test_ename_western_SOURCES = \
|
||||
test-ename-western.c
|
||||
|
||||
test_ename_western_LDADD = \
|
||||
$(ename_libs)
|
||||
|
||||
test_ename_western_gtk_SOURCES = \
|
||||
test-ename-western-gtk.c
|
||||
|
||||
test_ename_western_gtk_LDADD = \
|
||||
$(ename_libs) \
|
||||
$(E_UTIL_LIBS) \
|
||||
$(top_builddir)/e-util/libeutil.la
|
||||
@ -1,2 +0,0 @@
|
||||
* Support other naming systems.
|
||||
* Handle misspelled suffixes better.
|
||||
@ -1,444 +0,0 @@
|
||||
/* --------------------------------------------------
|
||||
|
||||
An address parser, yielding fields as per RFC 2426.
|
||||
|
||||
Author:
|
||||
Jesse Pavel (jpavel@ximian.com)
|
||||
|
||||
Copyright 2000, Ximian, Inc.
|
||||
--------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
|
||||
#ifdef E_ADDRESS_WESTERN_TEST
|
||||
|
||||
#include "e-address-western.h"
|
||||
|
||||
#else
|
||||
|
||||
#include <ename/e-address-western.h>
|
||||
#include <gal/util/e-util.h>
|
||||
|
||||
#endif
|
||||
|
||||
/* These are the keywords that will distinguish the start of an extended
|
||||
address. */
|
||||
|
||||
static char *extended_keywords[] = {
|
||||
"apt", "apartment", "suite", NULL
|
||||
};
|
||||
|
||||
|
||||
|
||||
static gboolean
|
||||
e_address_western_is_line_blank (gchar *line)
|
||||
{
|
||||
gboolean blank = TRUE;
|
||||
gint cntr;
|
||||
|
||||
/* A blank line consists of whitespace only, or a NULL line. */
|
||||
for (cntr = 0; line[cntr] != '\0'; cntr++ ) {
|
||||
if (!isspace(line[cntr])) {
|
||||
blank = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return blank;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* In the array of lines, `lines', we will erase the line at line_num, and
|
||||
shift the remaining lines, up to line number num_lines, up one position. */
|
||||
|
||||
static void
|
||||
e_address_western_shift_line (gchar *lines[], gint line_num, gint num_lines)
|
||||
{
|
||||
gint cntr;
|
||||
|
||||
if (line_num >= (num_lines - 1)) {
|
||||
/* It is the last line, so simply shift in a NULL. */
|
||||
lines[line_num] = NULL;
|
||||
}
|
||||
else {
|
||||
for (cntr = line_num; cntr < num_lines; cntr++)
|
||||
lines[cntr] = lines[cntr + 1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
e_address_western_remove_blank_lines (gchar *lines[], gint *linecntr)
|
||||
{
|
||||
gint cntr;
|
||||
|
||||
for (cntr = 0; cntr < *linecntr; cntr++) {
|
||||
if (e_address_western_is_line_blank (lines[cntr])) {
|
||||
/* Delete the blank line, and shift all subsequent lines up
|
||||
one spot to fill its old spot. */
|
||||
e_address_western_shift_line (lines, cntr, *linecntr);
|
||||
|
||||
/* Since we must check the newly shifted line, let's
|
||||
not advance the counter on this next pass. */
|
||||
cntr--;
|
||||
|
||||
/* There is now one less line, total. */
|
||||
*linecntr -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static gboolean
|
||||
e_address_western_is_po_box (gchar *line)
|
||||
{
|
||||
gboolean retval = FALSE;
|
||||
|
||||
/* In which phase of processing are we? */
|
||||
enum State { FIRSTCHAR, SECONDCHAR, WHITESPACE } state;
|
||||
|
||||
|
||||
/* If the first two letters of the line are `p' and `o', and these
|
||||
are in turn followed by whitespace before another letter, then I
|
||||
will deem the line a representation of a PO Box address. */
|
||||
|
||||
gint cntr;
|
||||
|
||||
state = FIRSTCHAR;
|
||||
for (cntr = 0; line[cntr] != '\0'; cntr++) {
|
||||
if (state == FIRSTCHAR) {
|
||||
if (isalnum(line[cntr])) {
|
||||
if (tolower(line[cntr]) == 'p')
|
||||
state = SECONDCHAR;
|
||||
else {
|
||||
retval = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (state == SECONDCHAR) {
|
||||
if (isalnum (line[cntr])) {
|
||||
if (tolower(line[cntr]) == 'o')
|
||||
state = WHITESPACE;
|
||||
else {
|
||||
retval = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (state == WHITESPACE) {
|
||||
if (isspace (line[cntr])) {
|
||||
retval = TRUE;
|
||||
break;
|
||||
}
|
||||
else if (isalnum (line[cntr])) {
|
||||
retval = FALSE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* A line that contains a comma followed eventually by a number is
|
||||
deemed to be the line in the form of <town, region postal-code>. */
|
||||
|
||||
static gboolean
|
||||
e_address_western_is_postal (guchar *line)
|
||||
{
|
||||
gboolean retval;
|
||||
int cntr;
|
||||
|
||||
if (strchr (line, ',') == NULL)
|
||||
retval = FALSE; /* No comma. */
|
||||
else {
|
||||
int index;
|
||||
|
||||
/* Ensure that the first character after the comma is
|
||||
a letter. */
|
||||
index = strcspn (line, ",");
|
||||
index++;
|
||||
while (isspace(line[index]))
|
||||
index++;
|
||||
|
||||
if (!isalpha (line[index]))
|
||||
return FALSE; /* FIXME: ugly control flow. */
|
||||
|
||||
cntr = strlen(line) - 1;
|
||||
|
||||
/* Go to the character immediately following the last
|
||||
whitespace character. */
|
||||
while (cntr >= 0 && isspace(line[cntr]))
|
||||
cntr--;
|
||||
|
||||
while (cntr >= 0 && !isspace(line[cntr]))
|
||||
cntr--;
|
||||
|
||||
if (cntr == 0)
|
||||
retval = FALSE;
|
||||
else {
|
||||
if (isdigit (line[cntr+1]))
|
||||
retval = TRUE;
|
||||
else
|
||||
retval = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static gchar *
|
||||
e_address_western_extract_po_box (gchar *line)
|
||||
{
|
||||
/* Return everything from the beginning of the line to
|
||||
the end of the first word that contains a number. */
|
||||
|
||||
int index;
|
||||
|
||||
index = 0;
|
||||
while (!isdigit(line[index]))
|
||||
index++;
|
||||
|
||||
while (isgraph(line[index]))
|
||||
index++;
|
||||
|
||||
return g_strndup (line, index);
|
||||
}
|
||||
|
||||
static gchar *
|
||||
e_address_western_extract_locality (gchar *line)
|
||||
{
|
||||
gint index;
|
||||
|
||||
/* Everything before the comma is the locality. */
|
||||
index = strcspn(line, ",");
|
||||
|
||||
if (index == 0)
|
||||
return NULL;
|
||||
else
|
||||
return g_strndup (line, index);
|
||||
}
|
||||
|
||||
|
||||
/* Whatever resides between the comma and the start of the
|
||||
postal code is deemed to be the region. */
|
||||
|
||||
static gchar *
|
||||
e_address_western_extract_region (gchar *line)
|
||||
{
|
||||
gint start, end;
|
||||
|
||||
start = strcspn (line, ",");
|
||||
start++;
|
||||
while (isspace(line[start]))
|
||||
start++;
|
||||
|
||||
end = strlen(line) - 1;
|
||||
while (isspace (line[end]))
|
||||
end--;
|
||||
|
||||
while (!isspace (line[end]))
|
||||
end--;
|
||||
|
||||
while (isspace (line[end]))
|
||||
end--;
|
||||
end++;
|
||||
|
||||
/* Between start and end lie the string. */
|
||||
return g_strndup ( (line+start), end-start);
|
||||
}
|
||||
|
||||
static gchar *
|
||||
e_address_western_extract_postal_code (gchar *line)
|
||||
{
|
||||
int start, end;
|
||||
|
||||
end = strlen (line) - 1;
|
||||
while (isspace(line[end]))
|
||||
end--;
|
||||
|
||||
start = end;
|
||||
end++;
|
||||
|
||||
while (!isspace(line[start]))
|
||||
start--;
|
||||
start++;
|
||||
|
||||
/* Between start and end lie the string. */
|
||||
return g_strndup ( (line+start), end-start);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void
|
||||
e_address_western_extract_street (gchar *line, gchar **street, gchar **extended)
|
||||
{
|
||||
const gchar *split = NULL;
|
||||
gint cntr;
|
||||
|
||||
for (cntr = 0; extended_keywords[cntr] != NULL; cntr++) {
|
||||
split = e_strstrcase (line, extended_keywords[cntr]);
|
||||
if (split != NULL)
|
||||
break;
|
||||
}
|
||||
|
||||
if (split != NULL) {
|
||||
*street = g_strndup (line, (split - line));
|
||||
*extended = g_strdup (split);
|
||||
}
|
||||
else {
|
||||
*street = g_strdup (line);
|
||||
*extended = NULL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
EAddressWestern *
|
||||
e_address_western_parse (const gchar *in_address)
|
||||
{
|
||||
gchar **lines;
|
||||
gint linecntr, lineindex;
|
||||
gchar *address;
|
||||
gint cntr;
|
||||
gboolean found_po_box, found_postal;
|
||||
|
||||
EAddressWestern *eaw;
|
||||
#if 0
|
||||
gint start, end; /* To be used to classify address lines. */
|
||||
#endif
|
||||
|
||||
if (in_address == NULL)
|
||||
return NULL;
|
||||
|
||||
eaw = (EAddressWestern *)g_malloc (sizeof(EAddressWestern));
|
||||
eaw->po_box = NULL;
|
||||
eaw->extended = NULL;
|
||||
eaw->street = NULL;
|
||||
eaw->locality = NULL;
|
||||
eaw->region = NULL;
|
||||
eaw->postal_code = NULL;
|
||||
eaw->country = NULL;
|
||||
|
||||
address = g_strndup (in_address, 2047);
|
||||
|
||||
/* The first thing I'll do is divide the multiline input string
|
||||
into lines. */
|
||||
|
||||
/* ... count the lines. */
|
||||
linecntr = 1;
|
||||
lineindex = 0;
|
||||
while (address[lineindex] != '\0') {
|
||||
if (address[lineindex] == '\n')
|
||||
linecntr++;
|
||||
|
||||
lineindex++;
|
||||
}
|
||||
|
||||
/* ... tally them. */
|
||||
lines = (gchar **)g_malloc (sizeof(gchar *) * (linecntr+3));
|
||||
lineindex = 0;
|
||||
lines[0] = &address[0];
|
||||
linecntr = 1;
|
||||
while (address[lineindex] != '\0') {
|
||||
if (address[lineindex] == '\n') {
|
||||
lines[linecntr] = &address[lineindex + 1];
|
||||
linecntr++;
|
||||
}
|
||||
|
||||
lineindex++;
|
||||
}
|
||||
|
||||
/* Convert the newlines at the end of each line (except the last,
|
||||
because it is already NULL terminated) to NULLs. */
|
||||
for (cntr = 0; cntr < (linecntr - 1); cntr++) {
|
||||
*(strchr (lines[cntr], '\n')) = '\0';
|
||||
}
|
||||
|
||||
e_address_western_remove_blank_lines (lines, &linecntr);
|
||||
|
||||
/* Let's just test these functions. */
|
||||
found_po_box = FALSE;
|
||||
found_postal = FALSE;
|
||||
|
||||
for (cntr = 0; cntr < linecntr; cntr++) {
|
||||
if (e_address_western_is_po_box (lines[cntr])) {
|
||||
if (eaw->po_box == NULL)
|
||||
eaw->po_box = e_address_western_extract_po_box (lines[cntr]);
|
||||
found_po_box = TRUE;
|
||||
}
|
||||
else if (e_address_western_is_postal (lines[cntr])) {
|
||||
if (eaw->locality == NULL)
|
||||
eaw->locality = e_address_western_extract_locality (lines[cntr]);
|
||||
if (eaw->region == NULL)
|
||||
eaw->region = e_address_western_extract_region (lines[cntr]);
|
||||
if (eaw->postal_code == NULL)
|
||||
eaw->postal_code = e_address_western_extract_postal_code (lines[cntr]);
|
||||
found_postal = TRUE;
|
||||
}
|
||||
else {
|
||||
if (found_postal) {
|
||||
if (eaw->country == NULL)
|
||||
eaw->country = g_strdup (lines[cntr]);
|
||||
else {
|
||||
gchar *temp;
|
||||
temp = g_strconcat (eaw->country, "\n", lines[cntr], NULL);
|
||||
g_free (eaw->country);
|
||||
eaw->country = temp;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (eaw->street == NULL) {
|
||||
e_address_western_extract_street (lines[cntr], &eaw->street,
|
||||
&eaw->extended );
|
||||
}
|
||||
else {
|
||||
gchar *temp;
|
||||
temp = g_strdup_printf (
|
||||
"%s\n%s",
|
||||
eaw->extended ? eaw->extended: "",
|
||||
lines[cntr]);
|
||||
g_free (eaw->extended);
|
||||
eaw->extended = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g_free (lines);
|
||||
g_free (address);
|
||||
|
||||
return eaw;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
e_address_western_free (EAddressWestern *eaw)
|
||||
{
|
||||
if (eaw == NULL)
|
||||
return;
|
||||
|
||||
if (eaw->po_box != NULL)
|
||||
g_free(eaw->po_box);
|
||||
if (eaw->extended != NULL)
|
||||
g_free(eaw->extended);
|
||||
if (eaw->street != NULL)
|
||||
g_free(eaw->street);
|
||||
if (eaw->locality != NULL)
|
||||
g_free(eaw->locality);
|
||||
if (eaw->region != NULL)
|
||||
g_free(eaw->region);
|
||||
if (eaw->postal_code != NULL)
|
||||
g_free(eaw->postal_code);
|
||||
if (eaw->country != NULL)
|
||||
g_free(eaw->country);
|
||||
|
||||
g_free (eaw);
|
||||
}
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
#ifndef __E_ADDRESS_WESTERN_H__
|
||||
#define __E_ADDRESS_WESTERN_H__
|
||||
|
||||
typedef struct {
|
||||
|
||||
/* Public */
|
||||
char *po_box;
|
||||
char *extended; /* I'm not sure what this is. */
|
||||
char *street;
|
||||
char *locality; /* For example, the city or town. */
|
||||
char *region; /* The state or province. */
|
||||
char *postal_code;
|
||||
char *country;
|
||||
} EAddressWestern;
|
||||
|
||||
EAddressWestern *e_address_western_parse (const char *address);
|
||||
void e_address_western_free (EAddressWestern *eaw);
|
||||
|
||||
#endif /* ! __E_ADDRESS_WESTERN_H__ */
|
||||
|
||||
|
||||
@ -1,74 +0,0 @@
|
||||
#ifndef __E_NAME_WESTERN_TABLES_H__
|
||||
#define __E_NAME_WESTERN_TABLES_H__
|
||||
|
||||
char *e_name_western_pfx_table[] = {
|
||||
|
||||
/*
|
||||
* English.
|
||||
*/
|
||||
"mister", "miss.", "mr.", "mrs.", "ms.",
|
||||
"miss", "mr", "mrs", "ms", "sir",
|
||||
"professor", "prof.", "dr", "dr.", "doctor",
|
||||
"judge", "justice", "chief justice",
|
||||
"congressman", "congresswoman", "commander",
|
||||
"lieutenant", "lt.", "colonel", "col.", "major", "maj.",
|
||||
"general", "gen.", "admiral", "admr.", "sergeant", "sgt.",
|
||||
"lord", "lady", "baron", "baroness", "duke", "duchess",
|
||||
"king", "queen", "prince", "princess",
|
||||
|
||||
"the most honorable", "the honorable",
|
||||
"the reverend", "his holiness",
|
||||
"his eminence", "his majesty", "her majesty",
|
||||
"his grace", "her grace",
|
||||
|
||||
"president", "vice president", "secretary", "undersecretary",
|
||||
"consul", "ambassador",
|
||||
|
||||
"senator", "saint", "st.", "pastor", "deacon",
|
||||
"father", "bishop", "archbishop", "cardinal", "pope",
|
||||
"reverend", "rev.", "rabbi",
|
||||
|
||||
/*
|
||||
* French.
|
||||
*/
|
||||
"monsieur", "m.", "mademoiselle", "melle",
|
||||
"madame", "mme", "professeur", "dauphin", "dauphine",
|
||||
|
||||
/*
|
||||
* German
|
||||
*/
|
||||
"herr", "frau", "fraulein", "herr doktor", "doktor frau", "doktor frau doktor",
|
||||
"frau doktor",
|
||||
|
||||
|
||||
/*
|
||||
* Spanish.
|
||||
*/
|
||||
"senor", "senora", "sra.", "senorita", "srita.",
|
||||
|
||||
NULL};
|
||||
|
||||
char *e_name_western_sfx_table[] = {
|
||||
|
||||
/*
|
||||
* English.
|
||||
*/
|
||||
"junior", "senior", "jr", "sr", "I", "II", "III", "IV", "V",
|
||||
"VI", "VII", "VIII", "IX", "X", "XI", "XII", "XIII", "XIV",
|
||||
"XV", "XVI", "XVII", "XVIII", "XIX", "XX", "XXI", "XXII",
|
||||
"phd", "ms", "md", "esq", "esq.", "esquire",
|
||||
|
||||
NULL};
|
||||
|
||||
char *e_name_western_twopart_sfx_table[] = {
|
||||
|
||||
/*
|
||||
* English.
|
||||
*/
|
||||
"the first", "the second", "the third",
|
||||
|
||||
NULL};
|
||||
|
||||
char *e_name_western_complex_last_table[] = {"van", "von", "de", "di", NULL};
|
||||
|
||||
#endif /* ! __E_NAME_WESTERN_TABLES_H__ */
|
||||
@ -1,982 +0,0 @@
|
||||
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
|
||||
/*
|
||||
* A simple Western name parser.
|
||||
*
|
||||
* <Nat> Jamie, do you know anything about name parsing?
|
||||
* <jwz> Are you going down that rat hole? Bring a flashlight.
|
||||
*
|
||||
* Authors:
|
||||
* Nat Friedman <nat@ximian.com>
|
||||
*
|
||||
* Copyright 1999 - 2001, Ximian, Inc.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
|
||||
#include <ename/e-name-western.h>
|
||||
#include <ename/e-name-western-tables.h>
|
||||
|
||||
typedef struct {
|
||||
int prefix_idx;
|
||||
int first_idx;
|
||||
int middle_idx;
|
||||
int nick_idx;
|
||||
int last_idx;
|
||||
int suffix_idx;
|
||||
} ENameWesternIdxs;
|
||||
|
||||
static int
|
||||
e_name_western_str_count_words (char *str)
|
||||
{
|
||||
int word_count;
|
||||
char *p;
|
||||
|
||||
word_count = 0;
|
||||
|
||||
for (p = str; p != NULL; p = g_utf8_strchr (p, -1, ' ')) {
|
||||
word_count ++;
|
||||
p = g_utf8_next_char (p);
|
||||
}
|
||||
|
||||
return word_count;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_cleanup_string (char **str)
|
||||
{
|
||||
char *newstr;
|
||||
char *p;
|
||||
|
||||
if (*str == NULL)
|
||||
return;
|
||||
|
||||
/* skip any spaces and commas at the start of the string */
|
||||
p = *str;
|
||||
while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
/* make the copy we're going to return */
|
||||
newstr = g_strdup (p);
|
||||
|
||||
if ( strlen(newstr) > 0) {
|
||||
/* now search from the back, skipping over any spaces and commas */
|
||||
p = newstr + strlen (newstr);
|
||||
p = g_utf8_prev_char (p);
|
||||
while (g_unichar_isspace (g_utf8_get_char(p)) || *p == ',')
|
||||
p = g_utf8_prev_char (p);
|
||||
/* advance p to after the character that caused us to exit the
|
||||
previous loop, and end the string. */
|
||||
if ((! g_unichar_isspace (g_utf8_get_char (p))) && *p != ',')
|
||||
p = g_utf8_next_char (p);
|
||||
*p = '\0';
|
||||
}
|
||||
|
||||
g_free (*str);
|
||||
*str = newstr;
|
||||
}
|
||||
|
||||
static char *
|
||||
e_name_western_get_words_at_idx (char *str, int idx, int num_words)
|
||||
{
|
||||
GString *words;
|
||||
char *p;
|
||||
int word_count;
|
||||
|
||||
/*
|
||||
* Walk to the end of the words.
|
||||
*/
|
||||
words = g_string_new ("");
|
||||
word_count = 0;
|
||||
p = str + idx;
|
||||
while (word_count < num_words && *p != '\0') {
|
||||
while (! g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0') {
|
||||
words = g_string_append_unichar (words, g_utf8_get_char (p));
|
||||
p = g_utf8_next_char (p);
|
||||
}
|
||||
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
word_count ++;
|
||||
}
|
||||
|
||||
return g_string_free (words, FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* What the fuck is wrong with glib's MAX macro.
|
||||
*/
|
||||
static int
|
||||
e_name_western_max (const int a, const int b)
|
||||
{
|
||||
if (a > b)
|
||||
return a;
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
e_name_western_word_is_suffix (char *word)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; e_name_western_sfx_table [i] != NULL; i ++) {
|
||||
int length = strlen (e_name_western_sfx_table [i]);
|
||||
if (!g_strcasecmp (word, e_name_western_sfx_table [i]) ||
|
||||
( !g_strncasecmp (word, e_name_western_sfx_table [i], length) &&
|
||||
strlen(word) == length + 1 &&
|
||||
word[length] == '.' ))
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static char *
|
||||
e_name_western_get_one_prefix_at_str (char *str)
|
||||
{
|
||||
char *word;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Check for prefixes from our table.
|
||||
*/
|
||||
for (i = 0; e_name_western_pfx_table [i] != NULL; i ++) {
|
||||
int pfx_words;
|
||||
char *words;
|
||||
|
||||
pfx_words = e_name_western_str_count_words (e_name_western_pfx_table [i]);
|
||||
words = e_name_western_get_words_at_idx (str, 0, pfx_words);
|
||||
|
||||
if (! g_strcasecmp (words, e_name_western_pfx_table [i]))
|
||||
return words;
|
||||
|
||||
g_free (words);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for prefixes we don't know about. These are always a
|
||||
* sequence of more than one letters followed by a period.
|
||||
*/
|
||||
word = e_name_western_get_words_at_idx (str, 0, 1);
|
||||
|
||||
if (g_utf8_strlen (word, -1) > 2 &&
|
||||
g_unichar_isalpha (g_utf8_get_char (word)) &&
|
||||
g_unichar_isalpha (g_utf8_get_char (g_utf8_next_char (word))) &&
|
||||
word [strlen (word) - 1] == '.')
|
||||
return word;
|
||||
|
||||
g_free (word);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static char *
|
||||
e_name_western_get_prefix_at_str (char *str)
|
||||
{
|
||||
char *pfx;
|
||||
char *pfx1;
|
||||
char *pfx2;
|
||||
char *p;
|
||||
|
||||
/* Get the first prefix. */
|
||||
pfx1 = e_name_western_get_one_prefix_at_str (str);
|
||||
|
||||
if (pfx1 == NULL)
|
||||
return NULL;
|
||||
|
||||
/* Check for a second prefix. */
|
||||
p = str + strlen (pfx1);
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
pfx2 = e_name_western_get_one_prefix_at_str (p);
|
||||
|
||||
if (pfx2 != NULL) {
|
||||
int pfx_len;
|
||||
|
||||
pfx_len = (p + strlen (pfx2)) - str;
|
||||
pfx = g_malloc0 (pfx_len + 1);
|
||||
strncpy (pfx, str, pfx_len);
|
||||
} else {
|
||||
pfx = g_strdup (pfx1);
|
||||
}
|
||||
|
||||
g_free (pfx1);
|
||||
g_free (pfx2);
|
||||
|
||||
return pfx;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_prefix (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *pfx;
|
||||
|
||||
pfx = e_name_western_get_prefix_at_str (name->full);
|
||||
|
||||
if (pfx == NULL)
|
||||
return;
|
||||
|
||||
idxs->prefix_idx = 0;
|
||||
name->prefix = pfx;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
e_name_western_is_complex_last_beginning (char *word)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; e_name_western_complex_last_table [i] != NULL; i ++) {
|
||||
|
||||
if (! g_strcasecmp (
|
||||
word, e_name_western_complex_last_table [i]))
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_first (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
/*
|
||||
* If there's a prefix, then the first name is right after it.
|
||||
*/
|
||||
if (idxs->prefix_idx != -1) {
|
||||
int first_idx;
|
||||
char *p;
|
||||
|
||||
first_idx = idxs->prefix_idx + strlen (name->prefix);
|
||||
|
||||
/* Skip past white space. */
|
||||
p = name->full + first_idx;
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
if (*p == '\0')
|
||||
return;
|
||||
|
||||
idxs->first_idx = p - name->full;
|
||||
name->first = e_name_western_get_words_at_idx (
|
||||
name->full, idxs->first_idx, 1);
|
||||
|
||||
} else {
|
||||
|
||||
/*
|
||||
* Otherwise, the first name is probably the first string.
|
||||
*/
|
||||
idxs->first_idx = 0;
|
||||
name->first = e_name_western_get_words_at_idx (
|
||||
name->full, idxs->first_idx, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that we didn't just assign the beginning of a
|
||||
* compound last name to the first name.
|
||||
*/
|
||||
if (name->first != NULL) {
|
||||
if (e_name_western_is_complex_last_beginning (name->first)) {
|
||||
g_free (name->first);
|
||||
name->first = NULL;
|
||||
idxs->first_idx = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_middle (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *word;
|
||||
char *middle;
|
||||
|
||||
/*
|
||||
* Middle names can only exist if you have a first name.
|
||||
*/
|
||||
if (idxs->first_idx == -1)
|
||||
return;
|
||||
|
||||
middle = name->full + idxs->first_idx + strlen (name->first);
|
||||
if (*middle == '\0')
|
||||
return;
|
||||
|
||||
middle = g_utf8_next_char (middle);
|
||||
if (*middle == '\0')
|
||||
return;
|
||||
|
||||
/*
|
||||
* Search for the first space (or the terminating \0)
|
||||
*/
|
||||
while (g_unichar_isspace (g_utf8_get_char (middle)) &&
|
||||
*middle != '\0')
|
||||
middle = g_utf8_next_char (middle);
|
||||
|
||||
if (*middle == '\0')
|
||||
return;
|
||||
|
||||
/*
|
||||
* Skip past the nickname, if it's there.
|
||||
*/
|
||||
if (*middle == '\"') {
|
||||
if (idxs->nick_idx == -1)
|
||||
return;
|
||||
|
||||
middle = name->full + idxs->nick_idx + strlen (name->nick);
|
||||
middle = g_utf8_next_char (middle);
|
||||
|
||||
while (g_unichar_isspace (g_utf8_get_char (middle)) &&
|
||||
*middle != '\0')
|
||||
middle = g_utf8_next_char (middle);
|
||||
|
||||
if (*middle == '\0')
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure this isn't the beginning of a complex last name.
|
||||
*/
|
||||
word = e_name_western_get_words_at_idx (name->full, middle - name->full, 1);
|
||||
if (e_name_western_is_complex_last_beginning (word)) {
|
||||
g_free (word);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure this isn't a suffix.
|
||||
*/
|
||||
e_name_western_cleanup_string (& word);
|
||||
if (e_name_western_word_is_suffix (word)) {
|
||||
g_free (word);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure we didn't just grab a cute nickname.
|
||||
*/
|
||||
if (word [0] == '\"') {
|
||||
g_free (word);
|
||||
return;
|
||||
}
|
||||
|
||||
idxs->middle_idx = middle - name->full;
|
||||
name->middle = word;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_nickname (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *nick;
|
||||
int start_idx;
|
||||
GString *str;
|
||||
|
||||
if (idxs->first_idx == -1)
|
||||
return;
|
||||
|
||||
if (idxs->middle_idx > idxs->first_idx)
|
||||
nick = name->full + idxs->middle_idx + strlen (name->middle);
|
||||
else
|
||||
nick = name->full + idxs->first_idx + strlen (name->first);
|
||||
|
||||
while (*nick != '\"' && *nick != '\0')
|
||||
nick = g_utf8_next_char (nick);
|
||||
|
||||
if (*nick != '\"')
|
||||
return;
|
||||
|
||||
start_idx = nick - name->full;
|
||||
|
||||
/*
|
||||
* Advance to the next double quote.
|
||||
*/
|
||||
str = g_string_new ("\"");
|
||||
nick = g_utf8_next_char (nick);
|
||||
|
||||
while (*nick != '\"' && *nick != '\0') {
|
||||
str = g_string_append_unichar (str, g_utf8_get_char (nick));
|
||||
nick = g_utf8_next_char (nick);
|
||||
}
|
||||
|
||||
if (*nick == '\0') {
|
||||
g_string_free (str, TRUE);
|
||||
return;
|
||||
}
|
||||
str = g_string_append (str, "\"");
|
||||
|
||||
name->nick = g_string_free (str, FALSE);
|
||||
|
||||
idxs->nick_idx = start_idx;
|
||||
}
|
||||
|
||||
static int
|
||||
e_name_western_last_get_max_idx (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
int max_idx = -1;
|
||||
|
||||
if (name->prefix != NULL)
|
||||
max_idx = e_name_western_max (
|
||||
max_idx, idxs->prefix_idx + strlen (name->prefix));
|
||||
|
||||
if (name->first != NULL)
|
||||
max_idx = e_name_western_max (
|
||||
max_idx, idxs->first_idx + strlen (name->first));
|
||||
|
||||
if (name->middle != NULL)
|
||||
max_idx = e_name_western_max (
|
||||
max_idx, idxs->middle_idx + strlen (name->middle));
|
||||
|
||||
if (name->nick != NULL)
|
||||
max_idx = e_name_western_max (
|
||||
max_idx, idxs->nick_idx + strlen (name->nick));
|
||||
|
||||
return max_idx;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_last (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *word;
|
||||
int idx = -1;
|
||||
char *last;
|
||||
|
||||
idx = e_name_western_last_get_max_idx (name, idxs);
|
||||
|
||||
/*
|
||||
* In the case where there is no preceding name element, the
|
||||
* name is either just a first name ("Nat", "John"), is a
|
||||
* single-element name ("Cher", which we treat as a first
|
||||
* name), or is just a last name. The only time we can
|
||||
* differentiate a last name alone from a single-element name
|
||||
* or a first name alone is if it's a complex last name ("de
|
||||
* Icaza", "van Josephsen"). So if there is no preceding name
|
||||
* element, we check to see whether or not the first part of
|
||||
* the name is the beginning of a complex name. If it is,
|
||||
* we subsume the entire string. If we accidentally subsume
|
||||
* the suffix, this will get fixed in the fixup routine.
|
||||
*/
|
||||
if (idx == -1) {
|
||||
word = e_name_western_get_words_at_idx (name->full, 0, 1);
|
||||
if (! e_name_western_is_complex_last_beginning (word)) {
|
||||
g_free (word);
|
||||
return;
|
||||
}
|
||||
|
||||
name->last = g_strdup (name->full);
|
||||
idxs->last_idx = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
last = name->full + idx;
|
||||
|
||||
/* Skip past the white space. */
|
||||
while (g_unichar_isspace (g_utf8_get_char (last)) && *last != '\0')
|
||||
last = g_utf8_next_char (last);
|
||||
|
||||
if (*last == '\0')
|
||||
return;
|
||||
|
||||
word = e_name_western_get_words_at_idx (name->full, last - name->full, 1);
|
||||
e_name_western_cleanup_string (& word);
|
||||
if (e_name_western_word_is_suffix (word)) {
|
||||
g_free (word);
|
||||
return;
|
||||
}
|
||||
g_free (word);
|
||||
|
||||
/*
|
||||
* Subsume the rest of the string into the last name. If we
|
||||
* accidentally include the prefix, it will get fixed later.
|
||||
* This is the only way to handle things like "Miguel de Icaza
|
||||
* Amozorrutia" without dropping data and forcing the user
|
||||
* to retype it.
|
||||
*/
|
||||
name->last = g_strdup (last);
|
||||
idxs->last_idx = last - name->full;
|
||||
}
|
||||
|
||||
static char *
|
||||
e_name_western_get_preceding_word (char *str, int idx)
|
||||
{
|
||||
int word_len;
|
||||
char *word;
|
||||
char *p;
|
||||
|
||||
p = str + idx;
|
||||
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && p > str)
|
||||
p = g_utf8_prev_char (p);
|
||||
|
||||
while (! g_unichar_isspace (g_utf8_get_char (p)) && p > str)
|
||||
p = g_utf8_prev_char (p);
|
||||
|
||||
if (g_unichar_isspace (g_utf8_get_char (p)))
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
word_len = (str + idx) - p;
|
||||
word = g_malloc0 (word_len + 1);
|
||||
if (word_len > 0)
|
||||
strncpy (word, p, word_len);
|
||||
|
||||
return word;
|
||||
}
|
||||
|
||||
static char *
|
||||
e_name_western_get_suffix_at_str_end (char *str)
|
||||
{
|
||||
char *suffix;
|
||||
char *p;
|
||||
|
||||
/*
|
||||
* Walk backwards till we reach the beginning of the
|
||||
* (potentially-comma-separated) list of suffixes.
|
||||
*/
|
||||
p = str + strlen (str);
|
||||
while (1) {
|
||||
char *nextp;
|
||||
char *word;
|
||||
|
||||
word = e_name_western_get_preceding_word (str, p - str);
|
||||
nextp = p - strlen (word);
|
||||
if (nextp == str) {
|
||||
g_free (word);
|
||||
break;
|
||||
}
|
||||
nextp = g_utf8_prev_char (nextp);
|
||||
|
||||
e_name_western_cleanup_string (& word);
|
||||
|
||||
if (e_name_western_word_is_suffix (word)) {
|
||||
p = nextp;
|
||||
g_free (word);
|
||||
} else {
|
||||
g_free (word);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (p == (str + strlen (str)))
|
||||
return NULL;
|
||||
|
||||
suffix = g_strdup (p);
|
||||
e_name_western_cleanup_string (& suffix);
|
||||
|
||||
if (strlen (suffix) == 0) {
|
||||
g_free (suffix);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return suffix;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_extract_suffix (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
name->suffix = e_name_western_get_suffix_at_str_end (name->full);
|
||||
|
||||
if (name->suffix == NULL)
|
||||
return;
|
||||
|
||||
idxs->suffix_idx = strlen (name->full) - strlen (name->suffix);
|
||||
}
|
||||
|
||||
static gboolean
|
||||
e_name_western_detect_backwards (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *comma;
|
||||
char *word;
|
||||
|
||||
comma = g_utf8_strchr (name->full, -1, ',');
|
||||
|
||||
if (comma == NULL)
|
||||
return FALSE;
|
||||
|
||||
/*
|
||||
* If there's a comma, we need to detect whether it's
|
||||
* separating the last name from the first or just separating
|
||||
* suffixes. So we grab the word which comes before the
|
||||
* comma and check if it's a suffix.
|
||||
*/
|
||||
word = e_name_western_get_preceding_word (name->full, comma - name->full);
|
||||
|
||||
if (e_name_western_word_is_suffix (word)) {
|
||||
g_free (word);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
g_free (word);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_reorder_asshole (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
char *prefix;
|
||||
char *last;
|
||||
char *suffix;
|
||||
char *firstmidnick;
|
||||
char *newfull;
|
||||
|
||||
char *comma;
|
||||
char *p;
|
||||
|
||||
if (! e_name_western_detect_backwards (name, idxs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Convert
|
||||
* <Prefix> <Last name>, <First name> <Middle[+nick] name> <Suffix>
|
||||
* to
|
||||
* <Prefix> <First name> <Middle[+nick] name> <Last name> <Suffix>
|
||||
*/
|
||||
|
||||
/*
|
||||
* Grab the prefix from the beginning.
|
||||
*/
|
||||
prefix = e_name_western_get_prefix_at_str (name->full);
|
||||
|
||||
/*
|
||||
* Everything from the end of the prefix to the comma is the
|
||||
* last name.
|
||||
*/
|
||||
comma = g_utf8_strchr (name->full, -1, ',');
|
||||
if (comma == NULL)
|
||||
return;
|
||||
|
||||
p = name->full + (prefix == NULL ? 0 : strlen (prefix));
|
||||
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
last = g_malloc0 (comma - p + 1);
|
||||
strncpy (last, p, comma - p);
|
||||
|
||||
/*
|
||||
* Get the suffix off the end.
|
||||
*/
|
||||
suffix = e_name_western_get_suffix_at_str_end (name->full);
|
||||
|
||||
/*
|
||||
* Firstmidnick is everything from the comma to the beginning
|
||||
* of the suffix.
|
||||
*/
|
||||
p = g_utf8_next_char (comma);
|
||||
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && *p != '\0')
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
if (suffix != NULL) {
|
||||
char *q;
|
||||
|
||||
/*
|
||||
* Point q at the beginning of the suffix.
|
||||
*/
|
||||
q = name->full + strlen (name->full) - strlen (suffix);
|
||||
q = g_utf8_prev_char (q);
|
||||
|
||||
/*
|
||||
* Walk backwards until we hit the space which
|
||||
* separates the suffix from firstmidnick.
|
||||
*/
|
||||
while (! g_unichar_isspace (g_utf8_get_char (q)) && q > comma)
|
||||
q = g_utf8_prev_char (q);
|
||||
|
||||
if ((q - p + 1) > 0) {
|
||||
firstmidnick = g_malloc0 (q - p + 1);
|
||||
strncpy (firstmidnick, p, q - p);
|
||||
} else
|
||||
firstmidnick = NULL;
|
||||
} else {
|
||||
firstmidnick = g_strdup (p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create our new reordered version of the name.
|
||||
*/
|
||||
#define NULLSTR(a) ((a) == NULL ? "" : (a))
|
||||
newfull = g_strdup_printf ("%s %s %s %s", NULLSTR (prefix), NULLSTR (firstmidnick),
|
||||
NULLSTR (last), NULLSTR (suffix));
|
||||
g_strstrip (newfull);
|
||||
g_free (name->full);
|
||||
name->full = newfull;
|
||||
|
||||
|
||||
g_free (prefix);
|
||||
g_free (firstmidnick);
|
||||
g_free (last);
|
||||
g_free (suffix);
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_zap_nil (char **str, int *idx)
|
||||
{
|
||||
if (*str == NULL)
|
||||
return;
|
||||
|
||||
if (strlen (*str) != 0)
|
||||
return;
|
||||
|
||||
*idx = -1;
|
||||
g_free (*str);
|
||||
*str = NULL;
|
||||
}
|
||||
|
||||
#define FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
|
||||
char *last_start = NULL; \
|
||||
if (name->last) \
|
||||
last_start = g_utf8_strchr (name->last, -1, ' '); \
|
||||
if (last_start) { \
|
||||
char *new_last, *new_first; \
|
||||
\
|
||||
new_last = g_strdup (g_utf8_next_char (last_start)); \
|
||||
*last_start = '\0'; \
|
||||
\
|
||||
idxs->last_idx += (last_start - name->last) + 1; \
|
||||
\
|
||||
new_first = g_strdup_printf ("%s %s %s", \
|
||||
name->first, \
|
||||
name->middle, \
|
||||
name->last); \
|
||||
\
|
||||
g_free (name->first); \
|
||||
g_free (name->middle); \
|
||||
g_free (name->last); \
|
||||
\
|
||||
name->first = new_first; \
|
||||
name->middle = NULL; \
|
||||
name->last = new_last; \
|
||||
\
|
||||
idxs->middle_idx = -1; \
|
||||
} else { \
|
||||
char *new_first; \
|
||||
\
|
||||
new_first = g_strdup_printf ("%s %s %s", \
|
||||
name->first, \
|
||||
name->middle, \
|
||||
name->last); \
|
||||
\
|
||||
g_free (name->first); \
|
||||
g_free (name->middle); \
|
||||
g_free (name->last); \
|
||||
\
|
||||
name->first = new_first; \
|
||||
name->middle = NULL; \
|
||||
name->last = NULL; \
|
||||
idxs->middle_idx = -1; \
|
||||
idxs->last_idx = -1; \
|
||||
}
|
||||
|
||||
#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION(conj) \
|
||||
if (idxs->middle_idx != -1 && !strcmp (name->middle, conj)) { \
|
||||
FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
|
||||
}
|
||||
|
||||
#define CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE(conj) \
|
||||
if (idxs->middle_idx != -1 && !strcasecmp (name->middle, conj)) { \
|
||||
FINISH_CHECK_MIDDLE_NAME_FOR_CONJUNCTION \
|
||||
}
|
||||
|
||||
static void
|
||||
e_name_western_fixup (ENameWestern *name, ENameWesternIdxs *idxs)
|
||||
{
|
||||
/*
|
||||
* The middle and last names cannot be the same.
|
||||
*/
|
||||
if (idxs->middle_idx != -1 && idxs->middle_idx == idxs->last_idx) {
|
||||
idxs->middle_idx = -1;
|
||||
g_free (name->middle);
|
||||
name->middle = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a middle name and no last name, then we mistook
|
||||
* the last name for the middle name.
|
||||
*/
|
||||
if (idxs->last_idx == -1 && idxs->middle_idx != -1) {
|
||||
idxs->last_idx = idxs->middle_idx;
|
||||
name->last = name->middle;
|
||||
name->middle = NULL;
|
||||
idxs->middle_idx = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if we accidentally included the suffix in the
|
||||
* last name.
|
||||
*/
|
||||
if (idxs->suffix_idx != -1 && idxs->last_idx != -1 &&
|
||||
idxs->suffix_idx < (idxs->last_idx + strlen (name->last))) {
|
||||
char *sfx;
|
||||
|
||||
sfx = name->last + (idxs->suffix_idx - idxs->last_idx);
|
||||
if (sfx != NULL) {
|
||||
char *newlast;
|
||||
char *p;
|
||||
|
||||
p = sfx;
|
||||
p = g_utf8_prev_char (p);
|
||||
while (g_unichar_isspace (g_utf8_get_char (p)) && p > name->last)
|
||||
p = g_utf8_prev_char (p);
|
||||
p = g_utf8_next_char (p);
|
||||
|
||||
newlast = g_malloc0 (p - name->last + 1);
|
||||
strncpy (newlast, name->last, p - name->last);
|
||||
g_free (name->last);
|
||||
name->last = newlast;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a prefix and a first name, but no last name,
|
||||
* then we need to assign the first name to the last name.
|
||||
* This way we get things like "Mr Friedman" correctly.
|
||||
*/
|
||||
if (idxs->first_idx != -1 && idxs->prefix_idx != -1 &&
|
||||
idxs->last_idx == -1) {
|
||||
name->last = name->first;
|
||||
idxs->last_idx = idxs->first_idx;
|
||||
idxs->first_idx = -1;
|
||||
name->first = NULL;
|
||||
}
|
||||
|
||||
if (idxs->middle_idx != -1) {
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("*");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("|");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("^");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("&&");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("||");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("+");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("-");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("and");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("or");
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("plus");
|
||||
|
||||
/* Spanish */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("y");
|
||||
|
||||
/* German */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("und");
|
||||
|
||||
/* Italian */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("e");
|
||||
|
||||
/* Czech */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("a");
|
||||
|
||||
/* Finnish */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("ja");
|
||||
|
||||
/* French */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION_CASE ("et");
|
||||
|
||||
/* Russian */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\x98"); /* u+0418 */
|
||||
CHECK_MIDDLE_NAME_FOR_CONJUNCTION ("\xd0\xb8"); /* u+0438 */
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove stray spaces and commas (although there don't seem
|
||||
* to be any in the test cases, they might show up later).
|
||||
*/
|
||||
e_name_western_cleanup_string (& name->prefix);
|
||||
e_name_western_cleanup_string (& name->first);
|
||||
e_name_western_cleanup_string (& name->middle);
|
||||
e_name_western_cleanup_string (& name->nick);
|
||||
e_name_western_cleanup_string (& name->last);
|
||||
e_name_western_cleanup_string (& name->suffix);
|
||||
|
||||
/*
|
||||
* Make zero-length strings just NULL.
|
||||
*/
|
||||
e_name_western_zap_nil (& name->prefix, & idxs->prefix_idx);
|
||||
e_name_western_zap_nil (& name->first, & idxs->first_idx);
|
||||
e_name_western_zap_nil (& name->middle, & idxs->middle_idx);
|
||||
e_name_western_zap_nil (& name->nick, & idxs->nick_idx);
|
||||
e_name_western_zap_nil (& name->last, & idxs->last_idx);
|
||||
e_name_western_zap_nil (& name->suffix, & idxs->suffix_idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* e_name_western_western_parse_fullname:
|
||||
* @full_name: A string containing a Western name.
|
||||
*
|
||||
* Parses @full_name and returns an #ENameWestern object filled with
|
||||
* the component parts of the name.
|
||||
*/
|
||||
ENameWestern *
|
||||
e_name_western_parse (const char *full_name)
|
||||
{
|
||||
ENameWesternIdxs *idxs;
|
||||
ENameWestern *wname;
|
||||
char *end;
|
||||
|
||||
if (!g_utf8_validate (full_name, -1, (const char **)&end)) {
|
||||
g_warning ("e_name_western_parse passed invalid UTF-8 sequence");
|
||||
*end = '\0';
|
||||
}
|
||||
|
||||
wname = g_new0 (ENameWestern, 1);
|
||||
|
||||
wname->full = g_strdup (full_name);
|
||||
|
||||
idxs = g_new0 (ENameWesternIdxs, 1);
|
||||
|
||||
idxs->prefix_idx = -1;
|
||||
idxs->first_idx = -1;
|
||||
idxs->middle_idx = -1;
|
||||
idxs->nick_idx = -1;
|
||||
idxs->last_idx = -1;
|
||||
idxs->suffix_idx = -1;
|
||||
|
||||
/*
|
||||
* An extremely simple algorithm.
|
||||
*
|
||||
* The goal here is to get it right 95% of the time for
|
||||
* Western names.
|
||||
*
|
||||
* First we check to see if this is an ass-backwards name
|
||||
* ("Prefix Last, First Middle Suffix"). These names really
|
||||
* suck (imagine "Dr von Johnson, Albert Roderick Jr"), so
|
||||
* we reorder them first and then parse them.
|
||||
*
|
||||
* Next, we grab the most obvious assignments for the various
|
||||
* parts of the name. Once this is done, we check for stupid
|
||||
* errors and fix them up.
|
||||
*/
|
||||
e_name_western_reorder_asshole (wname, idxs);
|
||||
|
||||
e_name_western_extract_prefix (wname, idxs);
|
||||
e_name_western_extract_first (wname, idxs);
|
||||
e_name_western_extract_nickname (wname, idxs);
|
||||
e_name_western_extract_middle (wname, idxs);
|
||||
e_name_western_extract_last (wname, idxs);
|
||||
e_name_western_extract_suffix (wname, idxs);
|
||||
|
||||
e_name_western_fixup (wname, idxs);
|
||||
|
||||
g_free (idxs);
|
||||
|
||||
return wname;
|
||||
}
|
||||
|
||||
/**
|
||||
* e_name_western_free:
|
||||
* @name: An ENameWestern object which needs to be freed.
|
||||
*
|
||||
* Deep-frees @name
|
||||
*/
|
||||
void
|
||||
e_name_western_free (ENameWestern *w)
|
||||
{
|
||||
|
||||
g_free (w->prefix);
|
||||
g_free (w->first);
|
||||
g_free (w->middle);
|
||||
g_free (w->nick);
|
||||
g_free (w->last);
|
||||
g_free (w->suffix);
|
||||
|
||||
g_free (w->full);
|
||||
|
||||
g_free (w);
|
||||
}
|
||||
@ -1,21 +0,0 @@
|
||||
#ifndef __E_NAME_WESTERN_H__
|
||||
#define __E_NAME_WESTERN_H__
|
||||
|
||||
typedef struct {
|
||||
|
||||
/* Public */
|
||||
char *prefix;
|
||||
char *first;
|
||||
char *middle;
|
||||
char *nick;
|
||||
char *last;
|
||||
char *suffix;
|
||||
|
||||
/* Private */
|
||||
char *full;
|
||||
} ENameWestern;
|
||||
|
||||
ENameWestern *e_name_western_parse (const char *full_name);
|
||||
void e_name_western_free (ENameWestern *w);
|
||||
|
||||
#endif /* ! __E_NAME_WESTERN_H__ */
|
||||
@ -1,157 +0,0 @@
|
||||
#include <gtk/gtkmain.h>
|
||||
#include <gtk/gtktable.h>
|
||||
#include <libgnomeui/gnome-app.h>
|
||||
#include <libgnomeui/gnome-ui-init.h>
|
||||
#include <gal/widgets/e-unicode.h>
|
||||
#include "e-name-western.h"
|
||||
|
||||
ENameWestern *name;
|
||||
GtkWidget *full;
|
||||
GtkWidget *prefix;
|
||||
GtkWidget *first;
|
||||
GtkWidget *middle;
|
||||
GtkWidget *nick;
|
||||
GtkWidget *last;
|
||||
GtkWidget *suffix;
|
||||
|
||||
static void
|
||||
fill_entries (void)
|
||||
{
|
||||
|
||||
#define SET(a,b) (e_utf8_gtk_entry_set_text (GTK_ENTRY (a), (b) == NULL ? "" : (b)))
|
||||
SET(prefix, name->prefix);
|
||||
SET(first, name->first);
|
||||
SET(middle, name->middle);
|
||||
SET(nick, name->nick);
|
||||
SET(last, name->last);
|
||||
SET(suffix, name->suffix);
|
||||
}
|
||||
|
||||
static void
|
||||
full_changed_cb (GtkEntry *fulle)
|
||||
{
|
||||
gchar *str;
|
||||
|
||||
e_name_western_free (name);
|
||||
str = e_utf8_gtk_entry_get_text (fulle);
|
||||
name = e_name_western_parse (str);
|
||||
fill_entries ();
|
||||
|
||||
g_free (str);
|
||||
}
|
||||
|
||||
static void
|
||||
create_window (void)
|
||||
{
|
||||
GtkWidget *app;
|
||||
GtkTable *table;
|
||||
|
||||
GtkWidget *prefix_label;
|
||||
GtkWidget *first_label;
|
||||
GtkWidget *middle_label;
|
||||
GtkWidget *nick_label;
|
||||
GtkWidget *last_label;
|
||||
GtkWidget *suffix_label;
|
||||
|
||||
app = gnome_app_new ("test", "Evolution Western Name Parser");
|
||||
|
||||
table = GTK_TABLE (gtk_table_new (3, 6, FALSE));
|
||||
|
||||
full = gtk_entry_new ();
|
||||
prefix = gtk_entry_new ();
|
||||
first = gtk_entry_new ();
|
||||
middle = gtk_entry_new ();
|
||||
nick = gtk_entry_new ();
|
||||
last = gtk_entry_new ();
|
||||
suffix = gtk_entry_new ();
|
||||
|
||||
gtk_widget_set_usize (prefix, 100, 0);
|
||||
gtk_widget_set_usize (first, 100, 0);
|
||||
gtk_widget_set_usize (middle, 100, 0);
|
||||
gtk_widget_set_usize (nick, 100, 0);
|
||||
gtk_widget_set_usize (last, 100, 0);
|
||||
gtk_widget_set_usize (suffix, 100, 0);
|
||||
|
||||
gtk_table_attach (table, full, 0, 6, 0, 1,
|
||||
GTK_EXPAND | GTK_FILL, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, prefix, 0, 1, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, first, 1, 2, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, middle, 2, 3, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, nick, 3, 4, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, last, 4, 5, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, suffix, 5, 6, 1, 2,
|
||||
GTK_EXPAND | GTK_FILL | GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
prefix_label = gtk_label_new ("Prefix");
|
||||
first_label = gtk_label_new ("First");
|
||||
middle_label = gtk_label_new ("Middle");
|
||||
nick_label = gtk_label_new ("Nick");
|
||||
last_label = gtk_label_new ("Last");
|
||||
suffix_label = gtk_label_new ("Suffix");
|
||||
|
||||
gtk_table_attach (table, prefix_label, 0, 1, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, first_label, 1, 2, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, middle_label, 2, 3, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, nick_label, 3, 4, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, last_label, 4, 5, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gtk_table_attach (table, suffix_label, 5, 6, 2, 3,
|
||||
GTK_SHRINK, 0,
|
||||
0, 0);
|
||||
|
||||
gnome_app_set_contents (GNOME_APP (app), GTK_WIDGET (table));
|
||||
|
||||
gtk_widget_show_all (app);
|
||||
|
||||
gtk_entry_set_text (GTK_ENTRY (full),
|
||||
"The Honorable Doctor van Jacobsen, Albert Roderick \"The Clenched Fist\" Jr, MD, PhD, Esquire");
|
||||
|
||||
name = e_name_western_parse ("The Honorable Doctor van Jacobsen, Albert Roderick \"The Clenched Fist\" Jr, MD, PhD, Esquire");
|
||||
fill_entries ();
|
||||
|
||||
gtk_signal_connect (GTK_OBJECT (full), "changed", GTK_SIGNAL_FUNC (full_changed_cb), NULL);
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
gnome_program_init ("Test EName", "Test EName", LIBGNOMEUI_MODULE, argc, argv, NULL);
|
||||
|
||||
create_window ();
|
||||
|
||||
gtk_main ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1,134 +0,0 @@
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <glib.h>
|
||||
#include <gtk/gtkmain.h>
|
||||
#include <ename/e-name-western.h>
|
||||
|
||||
|
||||
static void
|
||||
do_name (char *n)
|
||||
{
|
||||
ENameWestern *wname;
|
||||
|
||||
wname = e_name_western_parse (n);
|
||||
|
||||
printf ("Full Name: [%s]\n", n);
|
||||
|
||||
printf ("Prefix: [%s]\n", wname->prefix ? wname->prefix : "");
|
||||
printf ("First: [%s]\n", wname->first ? wname->first : "");
|
||||
printf ("Middle: [%s]\n", wname->middle ? wname->middle : "");
|
||||
printf ("Nick: [%s]\n", wname->nick ? wname->nick : "");
|
||||
printf ("Last: [%s]\n", wname->last ? wname->last : "");
|
||||
printf ("Suffix: [%s]\n", wname->suffix ? wname->suffix : "");
|
||||
|
||||
printf ("\n");
|
||||
|
||||
e_name_western_free (wname);
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char **argv)
|
||||
{
|
||||
GString *str;
|
||||
if (argc == 2) {
|
||||
while (! feof (stdin)) {
|
||||
char s[256];
|
||||
|
||||
if (fgets (s, sizeof (s), stdin) == NULL)
|
||||
return 0;
|
||||
|
||||
g_strstrip (s);
|
||||
|
||||
do_name (s);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
do_name ("Nat");
|
||||
do_name ("Karl Anders Carlsson");
|
||||
do_name ("Miguel de Icaza Amozorrutia");
|
||||
do_name ("The Honorable Doctor de Icaza, Miguel \"Sparky\" Junior, PhD, MD");
|
||||
do_name ("Nat Friedman MD, Phd");
|
||||
do_name ("Nat Friedman PhD");
|
||||
do_name ("Friedman, Nat");
|
||||
do_name ("Miguel de Icaza Esquire");
|
||||
do_name ("Dr Miguel \"Sparky\" de Icaza");
|
||||
do_name ("Robert H.B. Netzer");
|
||||
do_name ("W. Richard Stevens");
|
||||
do_name ("Nat Friedman");
|
||||
do_name ("N. Friedman");
|
||||
do_name ("Miguel de Icaza");
|
||||
do_name ("Drew Johnson");
|
||||
do_name ("President Bill \"Slick Willy\" Clinton");
|
||||
do_name ("The Honorable Mark J. Einstein Jr");
|
||||
do_name ("Friedman, Nat");
|
||||
do_name ("de Icaza, Miguel");
|
||||
do_name ("Mr de Icaza, Miguel");
|
||||
do_name ("Smith, John Jr");
|
||||
do_name ("Nick Glennie-Smith");
|
||||
do_name ("Dr von Johnson, Albert Roderick Jr");
|
||||
|
||||
/* create a name of the form:
|
||||
|
||||
<Prefix> <First name> <Nickname> <Middle> <Last name> <Suffix>
|
||||
|
||||
composed almost entirely of multibyte utf8 sequences.
|
||||
*/
|
||||
str = g_string_new ("Dr. ");
|
||||
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x57CE);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
|
||||
str = g_string_append (str, " \"");
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append (str, "\" ");
|
||||
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
|
||||
str = g_string_append (str, " ");
|
||||
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
str = g_string_append (str, ", Esquire");
|
||||
|
||||
do_name (str->str);
|
||||
|
||||
str = g_string_assign (str, "");
|
||||
|
||||
/* Now try a utf8 sequence of the form:
|
||||
|
||||
Prefix Last, First Middle Suffix
|
||||
*/
|
||||
|
||||
str = g_string_new ("Dr. ");
|
||||
|
||||
/* last */
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x57CE);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
|
||||
str = g_string_append (str, ", ");
|
||||
|
||||
/* first */
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
str = g_string_append_unichar (str, 0x57CE);
|
||||
|
||||
str = g_string_append (str, " ");
|
||||
|
||||
/* middle */
|
||||
str = g_string_append_unichar (str, 0x5341);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
str = g_string_append_unichar (str, 0x76EE);
|
||||
|
||||
str = g_string_append (str, ", Esquire");
|
||||
|
||||
do_name (str->str);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user