918 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			918 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python
 | 
						|
# -*- coding: utf-8 -*-
 | 
						|
#
 | 
						|
# compose-parse.py, version 1.3
 | 
						|
#
 | 
						|
# multifunction script that helps manage the compose sequence table in GTK+ (gtk/gtkimcontextsimple.c)
 | 
						|
# the script produces statistics and information about the whole process, run with --help for more.
 | 
						|
#
 | 
						|
# You may need to switch your python installation to utf-8, if you get 'ascii' codec errors.
 | 
						|
#
 | 
						|
# Complain to Simos Xenitellis (simos@gnome.org, http://simos.info/blog) for this craft.
 | 
						|
 | 
						|
from re			import findall, match, split, sub
 | 
						|
from string		import atoi
 | 
						|
from unicodedata	import normalize
 | 
						|
from urllib 		import urlretrieve
 | 
						|
from os.path		import isfile, getsize
 | 
						|
from copy 		import copy
 | 
						|
 | 
						|
import sys
 | 
						|
import getopt
 | 
						|
 | 
						|
# We grab files off the web, left and right.
 | 
						|
URL_COMPOSE = 'http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre'
 | 
						|
URL_KEYSYMSTXT = "http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt"
 | 
						|
URL_GDKKEYSYMSH = "http://svn.gnome.org/svn/gtk%2B/trunk/gdk/gdkkeysyms.h"
 | 
						|
URL_UNICODEDATATXT = 'http://www.unicode.org/Public/5.0.0/ucd/UnicodeData.txt'
 | 
						|
FILENAME_COMPOSE_SUPPLEMENTARY = 'gtk-compose-lookaside.txt'
 | 
						|
 | 
						|
# We currently support keysyms of size 2; once upstream xorg gets sorted, 
 | 
						|
# we might produce some tables with size 2 and some with size 4.
 | 
						|
SIZEOFINT = 2
 | 
						|
 | 
						|
# Current max compose sequence length; in case it gets increased.
 | 
						|
WIDTHOFCOMPOSETABLE = 5
 | 
						|
 | 
						|
keysymdatabase = {}
 | 
						|
keysymunicodedatabase = {}
 | 
						|
unicodedatabase = {}
 | 
						|
 | 
						|
headerfile_start = """/* GTK - The GIMP Tool Kit
 | 
						|
 * Copyright (C) 2007, 2008 GNOME Foundation
 | 
						|
 *
 | 
						|
 * This library is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU Lesser General Public
 | 
						|
 * License as published by the Free Software Foundation; either
 | 
						|
 * version 2 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * This library is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 * Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Lesser General Public
 | 
						|
 * License along with this library; if not, write to the
 | 
						|
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 | 
						|
 * Boston, MA 02111-1307, USA.
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * File auto-generated from script found at http://bugzilla.gnome.org/show_bug.cgi?id=321896
 | 
						|
 * using the input files
 | 
						|
 *  Input   : http://gitweb.freedesktop.org/?p=xorg/lib/libX11.git;a=blob_plain;f=nls/en_US.UTF-8/Compose.pre
 | 
						|
 *  Input   : http://www.cl.cam.ac.uk/~mgk25/ucs/keysyms.txt
 | 
						|
 *  Input   : http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
 | 
						|
 *
 | 
						|
 * This table is optimised for space and requires special handling to access the content.
 | 
						|
 * This table is used solely by http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimple.c
 | 
						|
 * 
 | 
						|
 * The resulting file is placed at http://svn.gnome.org/viewcvs/gtk%2B/trunk/gtk/gtkimcontextsimpleseqs.h
 | 
						|
 * This file is described in bug report http://bugzilla.gnome.org/show_bug.cgi?id=321896
 | 
						|
 */
 | 
						|
 | 
						|
/*
 | 
						|
 * Modified by the GTK+ Team and others 2007, 2008.  See the AUTHORS
 | 
						|
 * file for a list of people on the GTK+ Team.  See the ChangeLog
 | 
						|
 * files for a list of changes.  These files are distributed with
 | 
						|
 * GTK+ at ftp://ftp.gtk.org/pub/gtk/.
 | 
						|
 */
 | 
						|
 | 
						|
#ifndef __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
 | 
						|
#define __GTK_IM_CONTEXT_SIMPLE_SEQS_H__
 | 
						|
 | 
						|
/* === These are the original comments of the file; we keep for historical purposes ===
 | 
						|
 *
 | 
						|
 * The following table was generated from the X compose tables include with
 | 
						|
 * XFree86 4.0 using a set of Perl scripts. Contact Owen Taylor <otaylor@redhat.com>
 | 
						|
 * to obtain the relevant perl scripts.
 | 
						|
 *
 | 
						|
 * The following compose letter letter sequences confliced
 | 
						|
 *   Dstroke/dstroke and ETH/eth; resolved to Dstroke (Croation, Vietnamese, Lappish), over
 | 
						|
 *                                ETH (Icelandic, Faroese, old English, IPA)  [ D- -D d- -d ]
 | 
						|
 *   Amacron/amacron and ordfeminine; resolved to ordfeminine                 [ _A A_ a_ _a ]
 | 
						|
 *   Amacron/amacron and Atilde/atilde; resolved to atilde                    [ -A A- a- -a ]
 | 
						|
 *   Omacron/Omacron and masculine; resolved to masculine                     [ _O O_ o_ _o ]
 | 
						|
 *   Omacron/omacron and Otilde/atilde; resolved to otilde                    [ -O O- o- -o ]
 | 
						|
 *
 | 
						|
 * [ Amacron and Omacron are in Latin-4 (Baltic). ordfeminine and masculine are used for
 | 
						|
 *   spanish. atilde and otilde are used at least for Portuguese ]
 | 
						|
 *
 | 
						|
 *   at and Aring; resolved to Aring                                          [ AA ]
 | 
						|
 *   guillemotleft and caron; resolved to guillemotleft                       [ << ]
 | 
						|
 *   ogonek and cedilla; resolved to cedilla                                  [ ,, ]
 | 
						|
 *
 | 
						|
 * This probably should be resolved by first checking an additional set of compose tables
 | 
						|
 * that depend on the locale or selected input method.
 | 
						|
 */
 | 
						|
 | 
						|
static const guint16 gtk_compose_seqs_compact[] = {"""
 | 
						|
 | 
						|
headerfile_end = """};
 | 
						|
 | 
						|
#endif /* __GTK_IM_CONTEXT_SIMPLE_SEQS_H__ */
 | 
						|
"""
 | 
						|
 | 
						|
def stringtohex(str): return atoi(str, 16)
 | 
						|
 | 
						|
def factorial(n): 
 | 
						|
	if n <= 1:
 | 
						|
		return 1
 | 
						|
	else:
 | 
						|
		return n * factorial(n-1)
 | 
						|
 | 
						|
def uniq(*args) :
 | 
						|
	""" Performs a uniq operation on a list or lists """
 | 
						|
    	theInputList = []
 | 
						|
    	for theList in args:
 | 
						|
    	   theInputList += theList
 | 
						|
    	theFinalList = []
 | 
						|
    	for elem in theInputList:
 | 
						|
		if elem not in theFinalList:
 | 
						|
          		theFinalList.append(elem)
 | 
						|
    	return theFinalList
 | 
						|
 | 
						|
 | 
						|
 | 
						|
def all_permutations(seq):
 | 
						|
	""" Borrowed from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/252178 """
 | 
						|
	""" Produces all permutations of the items of a list """
 | 
						|
    	if len(seq) <=1:
 | 
						|
    	    yield seq
 | 
						|
    	else:
 | 
						|
    	    for perm in all_permutations(seq[1:]):
 | 
						|
    	        for i in range(len(perm)+1):
 | 
						|
    	            #nb str[0:1] works in both string and list contexts
 | 
						|
        	        yield perm[:i] + seq[0:1] + perm[i:]
 | 
						|
 | 
						|
def usage():
 | 
						|
	print """compose-parse available parameters:
 | 
						|
	-h, --help		this craft
 | 
						|
	-s, --statistics	show overall statistics (both algorithmic, non-algorithmic)
 | 
						|
	-a, --algorithmic	show sequences saved with algorithmic optimisation
 | 
						|
	-g, --gtk		show entries that go to GTK+
 | 
						|
	-u, --unicodedatatxt	show compose sequences derived from UnicodeData.txt (from unicode.org)
 | 
						|
	-v, --verbose		show verbose output
 | 
						|
        -p, --plane1		show plane1 compose sequences
 | 
						|
	-n, --numeric		when used with --gtk, create file with numeric values only
 | 
						|
	-e, --gtk-expanded	when used with --gtk, create file that repeats first column; not usable in GTK+
 | 
						|
 | 
						|
	Default is to show statistics.
 | 
						|
	"""
 | 
						|
 | 
						|
try: 
 | 
						|
	opts, args = getopt.getopt(sys.argv[1:], "pvgashune", ["help", "algorithmic", "statistics", "unicodedatatxt", 
 | 
						|
		"stats", "gtk", "verbose", "plane1", "numeric", "gtk-expanded"])
 | 
						|
except: 
 | 
						|
	usage()
 | 
						|
	sys.exit(2)
 | 
						|
 | 
						|
opt_statistics = False
 | 
						|
opt_algorithmic = False
 | 
						|
opt_gtk = False
 | 
						|
opt_unicodedatatxt = False
 | 
						|
opt_verbose = False
 | 
						|
opt_plane1 = False
 | 
						|
opt_numeric = False
 | 
						|
opt_gtkexpanded = False
 | 
						|
 | 
						|
for o, a in opts:
 | 
						|
	if o in ("-h", "--help"):
 | 
						|
		usage()
 | 
						|
		sys.exit()
 | 
						|
	if o in ("-s", "--statistics"):
 | 
						|
		opt_statistics = True
 | 
						|
	if o in ("-a", "--algorithmic"):
 | 
						|
		opt_algorithmic = True
 | 
						|
	if o in ("-g", "--gtk"):
 | 
						|
		opt_gtk = True	
 | 
						|
	if o in ("-u", "--unicodedatatxt"):
 | 
						|
		opt_unicodedatatxt = True
 | 
						|
	if o in ("-v", "--verbose"):
 | 
						|
		opt_verbose = True
 | 
						|
	if o in ("-p", "--plane1"):
 | 
						|
		opt_plane1 = True
 | 
						|
	if o in ("-n", "--numeric"):
 | 
						|
		opt_numeric = True
 | 
						|
	if o in ("-e", "--gtk-expanded"):
 | 
						|
		opt_gtkexpanded = True
 | 
						|
 | 
						|
if not opt_algorithmic and not opt_gtk and not opt_unicodedatatxt:
 | 
						|
	opt_statistics = True
 | 
						|
 | 
						|
def download_hook(blocks_transferred, block_size, file_size):
 | 
						|
	""" A download hook to provide some feedback when downloading """
 | 
						|
	if blocks_transferred == 0:
 | 
						|
		if file_size > 0:
 | 
						|
			if opt_verbose:
 | 
						|
				print "Downloading", file_size, "bytes: ",
 | 
						|
		else:	
 | 
						|
			if opt_verbose:
 | 
						|
				print "Downloading: ",
 | 
						|
	sys.stdout.write('#')
 | 
						|
	sys.stdout.flush()
 | 
						|
 | 
						|
 | 
						|
def download_file(url):
 | 
						|
	""" Downloads a file provided a URL. Returns the filename. """
 | 
						|
	""" Borks on failure """
 | 
						|
	localfilename = url.split('/')[-1]
 | 
						|
        if not isfile(localfilename) or getsize(localfilename) <= 0:
 | 
						|
		if opt_verbose:
 | 
						|
			print "Downloading ", url, "..."
 | 
						|
		try: 
 | 
						|
			urlretrieve(url, localfilename, download_hook)
 | 
						|
		except IOError, (errno, strerror):
 | 
						|
			print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
			sys.exit(-1)
 | 
						|
		except:
 | 
						|
			print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
			sys.exit(-1)
 | 
						|
		print " done."
 | 
						|
        else:
 | 
						|
		if opt_verbose:
 | 
						|
                	print "Using cached file for ", url
 | 
						|
	return localfilename
 | 
						|
 | 
						|
def process_gdkkeysymsh():
 | 
						|
	""" Opens the gdkkeysyms.h file from GTK+/gdk/gdkkeysyms.h """
 | 
						|
	""" Fills up keysymdb with contents """
 | 
						|
	filename_gdkkeysymsh = download_file(URL_GDKKEYSYMSH)
 | 
						|
	try: 
 | 
						|
		gdkkeysymsh = open(filename_gdkkeysymsh, 'r')
 | 
						|
	except IOError, (errno, strerror):
 | 
						|
		print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
		sys.exit(-1)
 | 
						|
	except:
 | 
						|
		print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
		sys.exit(-1)
 | 
						|
 | 
						|
	""" Parse the gdkkeysyms.h file and place contents in  keysymdb """
 | 
						|
	linenum_gdkkeysymsh = 0
 | 
						|
	keysymdb = {}
 | 
						|
	for line in gdkkeysymsh.readlines():
 | 
						|
		linenum_gdkkeysymsh += 1
 | 
						|
		line = line.strip()
 | 
						|
		if line == "" or not match('^#define GDK_', line):
 | 
						|
			continue
 | 
						|
		components = split('\s+', line)
 | 
						|
		if len(components) < 3:
 | 
						|
			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
 | 
						|
			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
 | 
						|
			print "Was expecting 3 items in the line"
 | 
						|
			sys.exit(-1)
 | 
						|
		if not match('^GDK_', components[1]):
 | 
						|
			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
 | 
						|
			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
 | 
						|
			print "Was expecting a keysym starting with GDK_"
 | 
						|
			sys.exit(-1)
 | 
						|
		if components[2][:2] == '0x' and match('[0-9a-fA-F]+$', components[2][2:]):
 | 
						|
			unival = atoi(components[2][2:], 16)
 | 
						|
			if unival == 0:
 | 
						|
				continue
 | 
						|
			keysymdb[components[1][4:]] = unival
 | 
						|
		else:
 | 
						|
			print "Invalid line %(linenum)d in %(filename)s: %(line)s"\
 | 
						|
			% {'linenum': linenum_gdkkeysymsh, 'filename': filename_gdkkeysymsh, 'line': line}
 | 
						|
			print "Was expecting a hexadecimal number at the end of the line"
 | 
						|
			sys.exit(-1)
 | 
						|
	gdkkeysymsh.close()
 | 
						|
 | 
						|
	""" Patch up the keysymdb with some of our own stuff """
 | 
						|
 | 
						|
	""" This is for a missing keysym from the currently upstream file """
 | 
						|
	keysymdb['dead_stroke'] = 0x338
 | 
						|
 | 
						|
	""" This is for a missing keysym from the currently upstream file """
 | 
						|
	###keysymdb['dead_belowring'] = 0x323
 | 
						|
	###keysymdb['dead_belowmacron'] = 0x331
 | 
						|
	###keysymdb['dead_belowcircumflex'] = 0x32d
 | 
						|
	###keysymdb['dead_belowtilde'] = 0x330
 | 
						|
	###keysymdb['dead_belowbreve'] = 0x32e
 | 
						|
	###keysymdb['dead_belowdiaeresis'] = 0x324
 | 
						|
 | 
						|
	""" This is^Wwas preferential treatment for Greek """
 | 
						|
	# keysymdb['dead_tilde'] = 0x342  		
 | 
						|
	""" This is^was preferential treatment for Greek """
 | 
						|
	#keysymdb['combining_tilde'] = 0x342	
 | 
						|
 | 
						|
	""" Fixing VoidSymbol """
 | 
						|
	keysymdb['VoidSymbol'] = 0xFFFF
 | 
						|
 | 
						|
	return keysymdb
 | 
						|
 | 
						|
def process_keysymstxt():
 | 
						|
	""" Grabs and opens the keysyms.txt file that Markus Kuhn maintains """
 | 
						|
	""" This file keeps a record between keysyms <-> unicode chars """
 | 
						|
	filename_keysymstxt = download_file(URL_KEYSYMSTXT)
 | 
						|
	try: 
 | 
						|
		keysymstxt = open(filename_keysymstxt, 'r')
 | 
						|
	except IOError, (errno, strerror):
 | 
						|
		print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
		sys.exit(-1)
 | 
						|
	except:
 | 
						|
		print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
		sys.exit(-1)
 | 
						|
 | 
						|
	""" Parse the keysyms.txt file and place content in  keysymdb """
 | 
						|
	linenum_keysymstxt = 0
 | 
						|
	keysymdb = {}
 | 
						|
	for line in keysymstxt.readlines():
 | 
						|
		linenum_keysymstxt += 1
 | 
						|
		line = line.strip()
 | 
						|
		if line == "" or match('^#', line):
 | 
						|
			continue
 | 
						|
		components = split('\s+', line)
 | 
						|
		if len(components) < 5:
 | 
						|
			print "Invalid line %(linenum)d in %(filename)s: %(line)s'"\
 | 
						|
			% {'linenum': linenum_keysymstxt, 'filename': filename_keysymstxt, 'line': line}
 | 
						|
			print "Was expecting 5 items in the line"
 | 
						|
			sys.exit(-1)
 | 
						|
		if components[1][0] == 'U' and match('[0-9a-fA-F]+$', components[1][1:]):
 | 
						|
			unival = atoi(components[1][1:], 16)
 | 
						|
		if unival == 0:
 | 
						|
			continue
 | 
						|
		keysymdb[components[4]] = unival
 | 
						|
	keysymstxt.close()
 | 
						|
 | 
						|
	""" Patch up the keysymdb with some of our own stuff """
 | 
						|
	""" This is for a missing keysym from the currently upstream file """
 | 
						|
	###keysymdb['dead_belowring'] = 0x323
 | 
						|
	###keysymdb['dead_belowmacron'] = 0x331
 | 
						|
	###keysymdb['dead_belowcircumflex'] = 0x32d
 | 
						|
	###keysymdb['dead_belowtilde'] = 0x330
 | 
						|
	###keysymdb['dead_belowbreve'] = 0x32e
 | 
						|
	###keysymdb['dead_belowdiaeresis'] = 0x324
 | 
						|
 | 
						|
	""" This is preferential treatment for Greek """
 | 
						|
	""" => we get more savings if used for Greek """
 | 
						|
	# keysymdb['dead_tilde'] = 0x342  		
 | 
						|
	""" This is preferential treatment for Greek """
 | 
						|
	# keysymdb['combining_tilde'] = 0x342	
 | 
						|
 | 
						|
	""" This is for a missing keysym from Markus Kuhn's db """
 | 
						|
	keysymdb['dead_stroke'] = 0x338
 | 
						|
	""" This is for a missing keysym from Markus Kuhn's db """
 | 
						|
	keysymdb['Oslash'] = 0x0d8		
 | 
						|
 | 
						|
	""" This is for a missing (recently added) keysym """
 | 
						|
	keysymdb['dead_psili'] = 0x313		
 | 
						|
	""" This is for a missing (recently added) keysym """
 | 
						|
	keysymdb['dead_dasia'] = 0x314		
 | 
						|
 | 
						|
	""" Allows to import Multi_key sequences """
 | 
						|
	keysymdb['Multi_key'] = 0xff20
 | 
						|
 | 
						|
	return keysymdb
 | 
						|
 | 
						|
def keysymvalue(keysym, file = "n/a", linenum = 0):
 | 
						|
	""" Extracts a value from the keysym """
 | 
						|
	""" Find the value of keysym, using the data from keysyms """
 | 
						|
	""" Use file and linenum to when reporting errors """
 | 
						|
	if keysym == "":
 | 
						|
		return 0
 | 
						|
       	if keysymdatabase.has_key(keysym):
 | 
						|
               	return keysymdatabase[keysym]
 | 
						|
       	elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
 | 
						|
               	return atoi(keysym[1:], 16)
 | 
						|
       	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
 | 
						|
		return atoi(keysym[2:], 16)
 | 
						|
	else:
 | 
						|
        	#print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
 | 
						|
               	return -1
 | 
						|
		#sys.exit(-1)
 | 
						|
 | 
						|
def keysymunicodevalue(keysym, file = "n/a", linenum = 0):
 | 
						|
	""" Extracts a value from the keysym """
 | 
						|
	""" Find the value of keysym, using the data from keysyms """
 | 
						|
	""" Use file and linenum to when reporting errors """
 | 
						|
	if keysym == "":
 | 
						|
		return 0
 | 
						|
       	if keysymunicodedatabase.has_key(keysym):
 | 
						|
               	return keysymunicodedatabase[keysym]
 | 
						|
       	elif keysym[0] == 'U' and match('[0-9a-fA-F]+$', keysym[1:]):
 | 
						|
               	return atoi(keysym[1:], 16)
 | 
						|
       	elif keysym[:2] == '0x' and match('[0-9a-fA-F]+$', keysym[2:]):
 | 
						|
		return atoi(keysym[2:], 16)
 | 
						|
	else:
 | 
						|
        	print 'UNKNOWN{%(keysym)s}' % { "keysym": keysym }
 | 
						|
               	sys.exit(-1)
 | 
						|
 | 
						|
def rename_combining(seq):
 | 
						|
	filtered_sequence = []
 | 
						|
	for ks in seq:
 | 
						|
		if findall('^combining_', ks):
 | 
						|
			filtered_sequence.append(sub('^combining_', 'dead_', ks))
 | 
						|
		else:
 | 
						|
			filtered_sequence.append(ks)
 | 
						|
	return filtered_sequence
 | 
						|
 | 
						|
 | 
						|
keysymunicodedatabase = process_keysymstxt()
 | 
						|
keysymdatabase = process_gdkkeysymsh()
 | 
						|
 | 
						|
""" Grab and open the compose file from upstream """
 | 
						|
filename_compose = download_file(URL_COMPOSE)
 | 
						|
try: 
 | 
						|
	composefile = open(filename_compose, 'r')
 | 
						|
except IOError, (errno, strerror):
 | 
						|
	print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
	sys.exit(-1)
 | 
						|
except:
 | 
						|
	print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
	sys.exit(-1)
 | 
						|
 | 
						|
""" Look if there is a lookaside (supplementary) compose file in the current
 | 
						|
    directory, and if so, open, then merge with upstream Compose file.
 | 
						|
"""
 | 
						|
try:
 | 
						|
        composefile_lookaside = open(FILENAME_COMPOSE_SUPPLEMENTARY, 'r')
 | 
						|
except IOError, (errno, strerror):
 | 
						|
        if not opt_quiet:
 | 
						|
                print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
                print "Did not find lookaside compose file. Continuing..."
 | 
						|
except:
 | 
						|
        print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
        sys.exit(-1)
 | 
						|
 | 
						|
xorg_compose_sequences_raw = []
 | 
						|
for seq in composefile.readlines():
 | 
						|
        xorg_compose_sequences_raw.append(seq)
 | 
						|
for seq in composefile_lookaside.readlines():
 | 
						|
        xorg_compose_sequences_raw.append(seq)
 | 
						|
 | 
						|
""" Parse the compose file in  xorg_compose_sequences"""
 | 
						|
xorg_compose_sequences = []
 | 
						|
xorg_compose_sequences_algorithmic = []
 | 
						|
linenum_compose = 0
 | 
						|
for line in xorg_compose_sequences_raw:
 | 
						|
	linenum_compose += 1
 | 
						|
	line = line.strip()
 | 
						|
	if line is "" or match("^XCOMM", line) or match("^#", line):
 | 
						|
		continue
 | 
						|
 | 
						|
	#line = line[:-1]
 | 
						|
	components = split(':', line)
 | 
						|
	if len(components) != 2:
 | 
						|
		print "Invalid line %(linenum_compose)d in %(filename)s: No sequence\
 | 
						|
		/value pair found" % { "linenum_compose": linenum_compose, "filename": filename_compose }
 | 
						|
		exit(-1)
 | 
						|
	(seq, val ) = split(':', line)
 | 
						|
	seq = seq.strip()
 | 
						|
	val = val.strip()
 | 
						|
	raw_sequence = findall('\w+', seq)
 | 
						|
	values = split('\s+', val)
 | 
						|
	unichar_temp = split('"', values[0])
 | 
						|
	unichar = unichar_temp[1]
 | 
						|
	if len(values) == 1:
 | 
						|
		continue
 | 
						|
	codepointstr = values[1]
 | 
						|
	if values[1] == '#':
 | 
						|
		# No codepoints that are >1 characters yet.
 | 
						|
		continue
 | 
						|
	if raw_sequence[0][0] == 'U' and match('[0-9a-fA-F]+$', raw_sequence[0][1:]):
 | 
						|
		raw_sequence[0] = '0x' + raw_sequence[0][1:]
 | 
						|
	if codepointstr[0] == 'U' and match('[0-9a-fA-F]+$', codepointstr[1:]):
 | 
						|
		codepoint = atoi(codepointstr[1:], 16)
 | 
						|
	elif keysymunicodedatabase.has_key(codepointstr):
 | 
						|
		if keysymdatabase[codepointstr] != keysymunicodedatabase[codepointstr]:
 | 
						|
			print "DIFFERENCE: 0x%(a)X 0x%(b)X" % { "a": keysymdatabase[codepointstr], "b": keysymunicodedatabase[codepointstr]},
 | 
						|
			print raw_sequence, codepointstr
 | 
						|
		codepoint = keysymunicodedatabase[codepointstr]
 | 
						|
	else:
 | 
						|
		print
 | 
						|
		print "Invalid codepoint at line %(linenum_compose)d in %(filename)s:\
 | 
						|
		 %(line)s" % { "linenum_compose": linenum_compose, "filename": filename_compose, "line": line }
 | 
						|
		exit(-1)
 | 
						|
	sequence = rename_combining(raw_sequence)
 | 
						|
	reject_this = False
 | 
						|
	for i in sequence:
 | 
						|
		if keysymvalue(i) > 0xFFFF:
 | 
						|
			reject_this = True
 | 
						|
			if opt_plane1:
 | 
						|
				print sequence
 | 
						|
			break
 | 
						|
		if keysymvalue(i) < 0:
 | 
						|
			reject_this = True
 | 
						|
			break
 | 
						|
	if reject_this:
 | 
						|
		continue
 | 
						|
	if "U0342" in sequence or \
 | 
						|
		"U0313" in sequence or \
 | 
						|
		"U0314" in sequence or \
 | 
						|
		"0x0313" in sequence or \
 | 
						|
		"0x0342" in sequence or \
 | 
						|
		"0x0314" in sequence:
 | 
						|
		continue
 | 
						|
	if "dead_belowring" in sequence or\
 | 
						|
		"dead_belowcomma" in sequence or\
 | 
						|
		"dead_belowmacron" in sequence or\
 | 
						|
		"dead_belowtilde" in sequence or\
 | 
						|
		"dead_belowbreve" in sequence or\
 | 
						|
		"dead_belowdiaeresis" in sequence or\
 | 
						|
		"dead_belowcircumflex" in sequence:
 | 
						|
		continue
 | 
						|
	#for i in range(len(sequence)):
 | 
						|
	#	if sequence[i] == "0x0342":
 | 
						|
	#		sequence[i] = "dead_tilde"
 | 
						|
	if "Multi_key" not in sequence:
 | 
						|
		""" Ignore for now >0xFFFF keysyms """
 | 
						|
		if codepoint < 0xFFFF:
 | 
						|
			original_sequence = copy(sequence)
 | 
						|
			stats_sequence = copy(sequence)
 | 
						|
			base = sequence.pop()
 | 
						|
			basechar = keysymvalue(base, filename_compose, linenum_compose)
 | 
						|
			
 | 
						|
			if basechar < 0xFFFF:
 | 
						|
				counter = 1
 | 
						|
				unisequence = []
 | 
						|
				not_normalised = True
 | 
						|
				skipping_this = False
 | 
						|
				for i in range(0, len(sequence)):
 | 
						|
					""" If the sequence has dead_tilde and is for Greek, we don't do algorithmically 
 | 
						|
					    because of lack of dead_perispomeni (i.e. conflict)
 | 
						|
					"""
 | 
						|
					bc = basechar
 | 
						|
					"""if sequence[-1] == "dead_tilde" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
 | 
						|
						skipping_this = True
 | 
						|
						break
 | 
						|
					if sequence[-1] == "dead_horn" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
 | 
						|
						skipping_this = True
 | 
						|
						break
 | 
						|
					if sequence[-1] == "dead_ogonek" and (bc >= 0x370 and bc <= 0x3ff) or (bc >= 0x1f00 and bc <= 0x1fff):
 | 
						|
						skipping_this = True
 | 
						|
						break
 | 
						|
					if sequence[-1] == "dead_psili":
 | 
						|
						sequence[i] = "dead_horn"
 | 
						|
					if sequence[-1] == "dead_dasia":
 | 
						|
						sequence[-1] = "dead_ogonek"
 | 
						|
					"""
 | 
						|
					unisequence.append(unichr(keysymunicodevalue(sequence.pop(), filename_compose, linenum_compose)))
 | 
						|
					
 | 
						|
				if skipping_this:
 | 
						|
					unisequence = []
 | 
						|
				for perm in all_permutations(unisequence):
 | 
						|
					# print counter, original_sequence, unichr(basechar) + "".join(perm)
 | 
						|
					# print counter, map(unichr, perm)
 | 
						|
					normalized = normalize('NFC', unichr(basechar) + "".join(perm))
 | 
						|
					if len(normalized) == 1:
 | 
						|
						# print 'Base: %(base)s [%(basechar)s], produces [%(unichar)s] (0x%(codepoint)04X)' \
 | 
						|
						# % { "base": base, "basechar": unichr(basechar), "unichar": unichar, "codepoint": codepoint },
 | 
						|
						# print "Normalized: [%(normalized)s] SUCCESS %(c)d" % { "normalized": normalized, "c": counter }
 | 
						|
						stats_sequence_data = map(keysymunicodevalue, stats_sequence)
 | 
						|
						stats_sequence_data.append(normalized)
 | 
						|
						xorg_compose_sequences_algorithmic.append(stats_sequence_data)
 | 
						|
						not_normalised = False
 | 
						|
						break;
 | 
						|
					counter += 1
 | 
						|
				if not_normalised:
 | 
						|
					original_sequence.append(codepoint)
 | 
						|
					xorg_compose_sequences.append(original_sequence)
 | 
						|
					""" print xorg_compose_sequences[-1] """
 | 
						|
					
 | 
						|
			else:
 | 
						|
				print "Error in base char !?!"
 | 
						|
				exit(-2)
 | 
						|
		else:
 | 
						|
			print "OVER", sequence
 | 
						|
			exit(-1)
 | 
						|
	else:
 | 
						|
		sequence.append(codepoint)
 | 
						|
		xorg_compose_sequences.append(sequence)
 | 
						|
		""" print xorg_compose_sequences[-1] """
 | 
						|
 | 
						|
def sequence_cmp(x, y):
 | 
						|
	if keysymvalue(x[0]) > keysymvalue(y[0]):
 | 
						|
		return 1
 | 
						|
	elif keysymvalue(x[0]) < keysymvalue(y[0]):
 | 
						|
		return -1
 | 
						|
	elif len(x) > len(y):
 | 
						|
		return 1
 | 
						|
	elif len(x) < len(y):
 | 
						|
		return -1
 | 
						|
	elif keysymvalue(x[1]) > keysymvalue(y[1]):
 | 
						|
		return 1
 | 
						|
	elif keysymvalue(x[1]) < keysymvalue(y[1]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 4:
 | 
						|
		return 0
 | 
						|
	elif keysymvalue(x[2]) > keysymvalue(y[2]):
 | 
						|
		return 1
 | 
						|
	elif keysymvalue(x[2]) < keysymvalue(y[2]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 5:
 | 
						|
		return 0
 | 
						|
	elif keysymvalue(x[3]) > keysymvalue(y[3]):
 | 
						|
		return 1
 | 
						|
	elif keysymvalue(x[3]) < keysymvalue(y[3]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 6:
 | 
						|
		return 0
 | 
						|
	elif keysymvalue(x[4]) > keysymvalue(y[4]):
 | 
						|
		return 1
 | 
						|
	elif keysymvalue(x[4]) < keysymvalue(y[4]):
 | 
						|
		return -1
 | 
						|
	else:
 | 
						|
		return 0
 | 
						|
 | 
						|
def sequence_unicode_cmp(x, y):
 | 
						|
	if keysymunicodevalue(x[0]) > keysymunicodevalue(y[0]):
 | 
						|
		return 1
 | 
						|
	elif keysymunicodevalue(x[0]) < keysymunicodevalue(y[0]):
 | 
						|
		return -1
 | 
						|
	elif len(x) > len(y):
 | 
						|
		return 1
 | 
						|
	elif len(x) < len(y):
 | 
						|
		return -1
 | 
						|
	elif keysymunicodevalue(x[1]) > keysymunicodevalue(y[1]):
 | 
						|
		return 1
 | 
						|
	elif keysymunicodevalue(x[1]) < keysymunicodevalue(y[1]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 4:
 | 
						|
		return 0
 | 
						|
	elif keysymunicodevalue(x[2]) > keysymunicodevalue(y[2]):
 | 
						|
		return 1
 | 
						|
	elif keysymunicodevalue(x[2]) < keysymunicodevalue(y[2]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 5:
 | 
						|
		return 0
 | 
						|
	elif keysymunicodevalue(x[3]) > keysymunicodevalue(y[3]):
 | 
						|
		return 1
 | 
						|
	elif keysymunicodevalue(x[3]) < keysymunicodevalue(y[3]):
 | 
						|
		return -1
 | 
						|
	elif len(x) < 6:
 | 
						|
		return 0
 | 
						|
	elif keysymunicodevalue(x[4]) > keysymunicodevalue(y[4]):
 | 
						|
		return 1
 | 
						|
	elif keysymunicodevalue(x[4]) < keysymunicodevalue(y[4]):
 | 
						|
		return -1
 | 
						|
	else:
 | 
						|
		return 0
 | 
						|
 | 
						|
def sequence_algorithmic_cmp(x, y):
 | 
						|
	if len(x) < len(y):
 | 
						|
		return -1
 | 
						|
	elif len(x) > len(y):
 | 
						|
		return 1
 | 
						|
	else:
 | 
						|
		for i in range(len(x)):
 | 
						|
			if x[i] < y[i]:
 | 
						|
				return -1
 | 
						|
			elif x[i] > y[i]:
 | 
						|
				return 1
 | 
						|
	return 0
 | 
						|
 | 
						|
 | 
						|
xorg_compose_sequences.sort(sequence_cmp)
 | 
						|
 | 
						|
xorg_compose_sequences_uniqued = []
 | 
						|
first_time = True
 | 
						|
item = None
 | 
						|
for next_item in xorg_compose_sequences:
 | 
						|
	if first_time:
 | 
						|
		first_time = False
 | 
						|
		item = next_item
 | 
						|
	if sequence_unicode_cmp(item, next_item) != 0:
 | 
						|
		xorg_compose_sequences_uniqued.append(item)
 | 
						|
	item = next_item
 | 
						|
 | 
						|
xorg_compose_sequences = copy(xorg_compose_sequences_uniqued)
 | 
						|
 | 
						|
counter_multikey = 0
 | 
						|
for item in xorg_compose_sequences:
 | 
						|
	if findall('Multi_key', "".join(item[:-1])) != []:
 | 
						|
		counter_multikey += 1
 | 
						|
 | 
						|
xorg_compose_sequences_algorithmic.sort(sequence_algorithmic_cmp)
 | 
						|
xorg_compose_sequences_algorithmic_uniqued = uniq(xorg_compose_sequences_algorithmic)
 | 
						|
 | 
						|
firstitem = ""
 | 
						|
num_first_keysyms = 0
 | 
						|
zeroes = 0
 | 
						|
num_entries = 0
 | 
						|
num_algorithmic_greek = 0
 | 
						|
for sequence in xorg_compose_sequences:
 | 
						|
	if keysymvalue(firstitem) != keysymvalue(sequence[0]): 
 | 
						|
		firstitem = sequence[0]
 | 
						|
		num_first_keysyms += 1
 | 
						|
	zeroes += 6 - len(sequence) + 1
 | 
						|
	num_entries += 1
 | 
						|
 | 
						|
for sequence in xorg_compose_sequences_algorithmic_uniqued:
 | 
						|
	ch = ord(sequence[-1:][0])
 | 
						|
	if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
 | 
						|
		num_algorithmic_greek += 1
 | 
						|
		
 | 
						|
 | 
						|
if opt_algorithmic:
 | 
						|
	for sequence in xorg_compose_sequences_algorithmic_uniqued:
 | 
						|
		letter = "".join(sequence[-1:])
 | 
						|
		print '0x%(cp)04X, %(uni)c, seq: [ <0x%(base)04X>,' % { 'cp': ord(unicode(letter)), 'uni': letter, 'base': sequence[-2] },
 | 
						|
		for elem in sequence[:-2]:
 | 
						|
			print "<0x%(keysym)04X>," % { 'keysym': elem },
 | 
						|
		""" Yeah, verified... We just want to keep the output similar to -u, so we can compare/sort easily """
 | 
						|
		print "], recomposed as", letter, "verified"
 | 
						|
 | 
						|
def num_of_keysyms(seq):
 | 
						|
	return len(seq) - 1
 | 
						|
 | 
						|
def convert_UnotationToHex(arg):
 | 
						|
	if isinstance(arg, str):
 | 
						|
		if match('^U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]$', arg):
 | 
						|
			return sub('^U', '0x', arg)
 | 
						|
	return arg
 | 
						|
 | 
						|
def addprefix_GDK(arg):
 | 
						|
	if match('^0x', arg):
 | 
						|
		return '%(arg)s, ' % { 'arg': arg } 
 | 
						|
	else:
 | 
						|
		return 'GDK_%(arg)s, ' % { 'arg': arg } 
 | 
						|
 | 
						|
if opt_gtk:
 | 
						|
	first_keysym = ""
 | 
						|
	sequence = []
 | 
						|
	compose_table = []
 | 
						|
	ct_second_part = []
 | 
						|
	ct_sequence_width = 2
 | 
						|
	start_offset = num_first_keysyms * (WIDTHOFCOMPOSETABLE+1)
 | 
						|
	we_finished = False
 | 
						|
	counter = 0
 | 
						|
 | 
						|
	sequence_iterator = iter(xorg_compose_sequences)
 | 
						|
	sequence = sequence_iterator.next()
 | 
						|
	while True:
 | 
						|
		first_keysym = sequence[0]					# Set the first keysym
 | 
						|
		compose_table.append([first_keysym, 0, 0, 0, 0, 0])
 | 
						|
		while sequence[0] == first_keysym:
 | 
						|
			compose_table[counter][num_of_keysyms(sequence)-1] += 1
 | 
						|
			try:
 | 
						|
				sequence = sequence_iterator.next()
 | 
						|
			except StopIteration:
 | 
						|
				we_finished = True
 | 
						|
				break
 | 
						|
		if we_finished:
 | 
						|
			break
 | 
						|
		counter += 1
 | 
						|
 | 
						|
	ct_index = start_offset
 | 
						|
	for line_num in range(len(compose_table)):
 | 
						|
		for i in range(WIDTHOFCOMPOSETABLE):
 | 
						|
			occurences = compose_table[line_num][i+1]
 | 
						|
			compose_table[line_num][i+1] = ct_index
 | 
						|
			ct_index += occurences * (i+2)
 | 
						|
 | 
						|
	for sequence in xorg_compose_sequences:
 | 
						|
		ct_second_part.append(map(convert_UnotationToHex, sequence))
 | 
						|
 | 
						|
	print headerfile_start
 | 
						|
	for i in compose_table:
 | 
						|
		if opt_gtkexpanded:
 | 
						|
			print "0x%(ks)04X," % { "ks": keysymvalue(i[0]) },
 | 
						|
			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i[1:])) }
 | 
						|
		elif not match('^0x', i[0]):
 | 
						|
			print 'GDK_%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
 | 
						|
		else:
 | 
						|
			print '%(str)s' % { 'str': "".join(map(lambda x : str(x) + ", ", i)) }
 | 
						|
	for i in ct_second_part:
 | 
						|
		if opt_numeric:
 | 
						|
			for ks in i[1:][:-1]:
 | 
						|
				print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
 | 
						|
			print '0x%(cp)04X, ' % { 'cp':i[-1] }
 | 
						|
			"""
 | 
						|
			for ks in i[:-1]:
 | 
						|
				print '0x%(seq)04X, ' % { 'seq': keysymvalue(ks) },
 | 
						|
			print '0x%(cp)04X, ' % { 'cp':i[-1] }
 | 
						|
			"""
 | 
						|
		elif opt_gtkexpanded:
 | 
						|
			print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1])), 'cp':i[-1] }
 | 
						|
		else:
 | 
						|
			print '%(seq)s0x%(cp)04X, ' % { 'seq': "".join(map(addprefix_GDK, i[:-1][1:])), 'cp':i[-1] }
 | 
						|
	print headerfile_end 
 | 
						|
 | 
						|
def redecompose(codepoint):
 | 
						|
	(name, decomposition, combiningclass) = unicodedatabase[codepoint]
 | 
						|
	if decomposition[0] == '' or decomposition[0] == '0':
 | 
						|
		return [codepoint]
 | 
						|
	if match('<\w+>', decomposition[0]):
 | 
						|
		numdecomposition = map(stringtohex, decomposition[1:])
 | 
						|
		return map(redecompose, numdecomposition)
 | 
						|
	numdecomposition = map(stringtohex, decomposition)
 | 
						|
	return map(redecompose, numdecomposition)
 | 
						|
 | 
						|
def process_unicodedata_file(verbose = False):
 | 
						|
	""" Grab from wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt """
 | 
						|
	filename_unicodedatatxt = download_file(URL_UNICODEDATATXT)
 | 
						|
	try: 
 | 
						|
		unicodedatatxt = open(filename_unicodedatatxt, 'r')
 | 
						|
	except IOError, (errno, strerror):
 | 
						|
		print "I/O error(%s): %s" % (errno, strerror)
 | 
						|
		sys.exit(-1)
 | 
						|
	except:
 | 
						|
		print "Unexpected error: ", sys.exc_info()[0]
 | 
						|
		sys.exit(-1)
 | 
						|
	for line in unicodedatatxt.readlines():
 | 
						|
		if line[0] == "" or line[0] == '#':
 | 
						|
			continue
 | 
						|
		line = line[:-1]
 | 
						|
		uniproperties = split(';', line)
 | 
						|
		codepoint = stringtohex(uniproperties[0])
 | 
						|
		""" We don't do Plane 1 or CJK blocks. The latter require reading additional files. """
 | 
						|
		if codepoint > 0xFFFF or (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or (codepoint >= 0xF900 and codepoint <= 0xFAFF): 
 | 
						|
			continue
 | 
						|
		name = uniproperties[1]
 | 
						|
		category = uniproperties[2]
 | 
						|
		combiningclass = uniproperties[3]
 | 
						|
		decomposition = uniproperties[5]
 | 
						|
		unicodedatabase[codepoint] = [name, split('\s+', decomposition), combiningclass]
 | 
						|
	
 | 
						|
	counter_combinations = 0
 | 
						|
	counter_combinations_greek = 0
 | 
						|
	counter_entries = 0
 | 
						|
	counter_entries_greek = 0
 | 
						|
 | 
						|
	for item in unicodedatabase.keys():
 | 
						|
		(name, decomposition, combiningclass) = unicodedatabase[item]
 | 
						|
		if decomposition[0] == '':
 | 
						|
			continue
 | 
						|
			print name, "is empty"
 | 
						|
		elif match('<\w+>', decomposition[0]):
 | 
						|
			continue
 | 
						|
			print name, "has weird", decomposition[0]
 | 
						|
		else:
 | 
						|
			sequence = map(stringtohex, decomposition)
 | 
						|
			chrsequence = map(unichr, sequence)
 | 
						|
			normalized = normalize('NFC', "".join(chrsequence))
 | 
						|
			
 | 
						|
			""" print name, sequence, "Combining: ", "".join(chrsequence), normalized, len(normalized),  """
 | 
						|
			decomposedsequence = []
 | 
						|
			for subseq in map(redecompose, sequence):
 | 
						|
				for seqitem in subseq:
 | 
						|
					if isinstance(seqitem, list):
 | 
						|
						for i in seqitem:
 | 
						|
							if isinstance(i, list):
 | 
						|
								for j in i:
 | 
						|
									decomposedsequence.append(j)
 | 
						|
							else:
 | 
						|
								decomposedsequence.append(i)
 | 
						|
					else:
 | 
						|
						decomposedsequence.append(seqitem)
 | 
						|
			recomposedchar = normalize('NFC', "".join(map(unichr, decomposedsequence)))
 | 
						|
			if len(recomposedchar) == 1 and len(decomposedsequence) > 1:
 | 
						|
				counter_entries += 1
 | 
						|
				counter_combinations += factorial(len(decomposedsequence)-1)
 | 
						|
				ch = item
 | 
						|
				if ch >= 0x370 and ch <= 0x3ff or ch >= 0x1f00 and ch <= 0x1fff:
 | 
						|
					counter_entries_greek += 1
 | 
						|
					counter_combinations_greek += factorial(len(decomposedsequence)-1)
 | 
						|
				if verbose:
 | 
						|
					print "0x%(cp)04X, %(uni)c, seq:" % { 'cp':item, 'uni':unichr(item) },
 | 
						|
					print "[",
 | 
						|
					for elem in decomposedsequence:
 | 
						|
						print '<0x%(hex)04X>,' % { 'hex': elem },
 | 
						|
					print "], recomposed as", recomposedchar,
 | 
						|
					if unichr(item) == recomposedchar:
 | 
						|
						print "verified"
 | 
						|
	
 | 
						|
	if verbose == False:
 | 
						|
		print "Unicode statistics from UnicodeData.txt"
 | 
						|
		print "Number of entries that can be algorithmically produced     :", counter_entries
 | 
						|
		print "  of which are for Greek                                   :", counter_entries_greek
 | 
						|
		print "Number of compose sequence combinations requiring          :", counter_combinations
 | 
						|
		print "  of which are for Greek                                   :", counter_combinations_greek
 | 
						|
		print "Note: We do not include partial compositions, "
 | 
						|
		print "thus the slight discrepancy in the figures"
 | 
						|
		print
 | 
						|
 | 
						|
if opt_unicodedatatxt:
 | 
						|
	process_unicodedata_file(True)
 | 
						|
 | 
						|
if opt_statistics:
 | 
						|
	print
 | 
						|
	print "Total number of compose sequences (from file)              :", len(xorg_compose_sequences) + len(xorg_compose_sequences_algorithmic)
 | 
						|
	print "  of which can be expressed algorithmically                :", len(xorg_compose_sequences_algorithmic)
 | 
						|
	print "  of which cannot be expressed algorithmically             :", len(xorg_compose_sequences) 
 | 
						|
	print "    of which have Multi_key                                :", counter_multikey
 | 
						|
	print 
 | 
						|
	print "Algorithmic (stats for Xorg Compose file)"
 | 
						|
	print "Number of sequences off due to algo from file (len(array)) :", len(xorg_compose_sequences_algorithmic)
 | 
						|
	print "Number of sequences off due to algo (uniq(sort(array)))    :", len(xorg_compose_sequences_algorithmic_uniqued)
 | 
						|
	print "  of which are for Greek                                   :", num_algorithmic_greek
 | 
						|
	print 
 | 
						|
	process_unicodedata_file()
 | 
						|
	print "Not algorithmic (stats from Xorg Compose file)"
 | 
						|
	print "Number of sequences                                        :", len(xorg_compose_sequences) 
 | 
						|
	print "Flat array looks like                                      :", len(xorg_compose_sequences), "rows of 6 integers (2 bytes per int, or 12 bytes per row)"
 | 
						|
	print "Flat array would have taken up (in bytes)                  :", num_entries * 2 * 6, "bytes from the GTK+ library"
 | 
						|
	print "Number of items in flat array                              :", len(xorg_compose_sequences) * 6
 | 
						|
	print "  of which are zeroes                                      :", zeroes, "or ", (100 * zeroes) / (len(xorg_compose_sequences) * 6), " per cent"
 | 
						|
	print "Number of different first items                            :", num_first_keysyms
 | 
						|
	print "Number of max bytes (if using flat array)                  :", num_entries * 2 * 6
 | 
						|
	print "Number of savings                                          :", zeroes * 2 - num_first_keysyms * 2 * 5
 | 
						|
	print 
 | 
						|
	print "Memory needs if both algorithmic+optimised table in latest Xorg compose file"
 | 
						|
	print "                                                           :", num_entries * 2 * 6 - zeroes * 2 + num_first_keysyms * 2 * 5
 | 
						|
	print
 | 
						|
	print "Existing (old) implementation in GTK+"
 | 
						|
	print "Number of sequences in old gtkimcontextsimple.c            :", 691
 | 
						|
	print "The existing (old) implementation in GTK+ takes up         :", 691 * 2 * 12, "bytes"
 |