UnicodeData.py
1 #!/usr/local/bin/python 2 # -*- coding: iso-8859-1 -*- 3 4 # $Id$ 5 6 # Copyright (c) 2004 Kungliga Tekniska Högskolan 7 # (Royal Institute of Technology, Stockholm, Sweden). 8 # All rights reserved. 9 # 10 # Redistribution and use in source and binary forms, with or without 11 # modification, are permitted provided that the following conditions 12 # are met: 13 # 14 # 1. Redistributions of source code must retain the above copyright 15 # notice, this list of conditions and the following disclaimer. 16 # 17 # 2. Redistributions in binary form must reproduce the above copyright 18 # notice, this list of conditions and the following disclaimer in the 19 # documentation and/or other materials provided with the distribution. 20 # 21 # 3. Neither the name of the Institute nor the names of its contributors 22 # may be used to endorse or promote products derived from this software 23 # without specific prior written permission. 24 # 25 # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 26 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 # ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 29 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 # SUCH DAMAGE. 36 37 import re 38 import string 39 40 def read(filename): 41 """return a dict of unicode characters""" 42 ud = open(filename, 'r') 43 ret = {} 44 while True: 45 l = ud.readline() 46 if not l: 47 break 48 l = re.sub('#.*$', '', l) 49 if l == "\n": 50 continue 51 f = l.split(';') 52 key = int(f[0], 0x10) 53 if key in ret: 54 raise Exception('Duplicate key in UnicodeData') 55 ret[key] = f[1:] 56 ud.close() 57 return ret