/ lib / wind / UnicodeData.py
UnicodeData.py
 1  #!/usr/local/bin/python
 2  # -*- coding: iso-8859-1 -*-
 3  
 4  # $Id$
 5  
 6  # Copyright (c) 2004 Kungliga Tekniska Högskolan
 7  # (Royal Institute of Technology, Stockholm, Sweden). 
 8  # All rights reserved. 
 9  # 
10  # Redistribution and use in source and binary forms, with or without 
11  # modification, are permitted provided that the following conditions 
12  # are met: 
13  # 
14  # 1. Redistributions of source code must retain the above copyright 
15  #    notice, this list of conditions and the following disclaimer. 
16  # 
17  # 2. Redistributions in binary form must reproduce the above copyright 
18  #    notice, this list of conditions and the following disclaimer in the 
19  #    documentation and/or other materials provided with the distribution. 
20  # 
21  # 3. Neither the name of the Institute nor the names of its contributors 
22  #    may be used to endorse or promote products derived from this software 
23  #    without specific prior written permission. 
24  # 
25  # THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND 
26  # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
27  # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
28  # ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE 
29  # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
30  # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
31  # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
32  # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
33  # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
34  # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
35  # SUCH DAMAGE. 
36  
37  import re
38  import string
39  
40  def read(filename):
41      """return a dict of unicode characters"""
42      ud = open(filename, 'r')
43      ret = {}
44      while True:
45          l = ud.readline()
46          if not l:
47              break
48          l = re.sub('#.*$', '', l)
49          if l == "\n":
50              continue
51          f = l.split(';')
52          key = int(f[0], 0x10)
53  	if key in ret:
54              raise Exception('Duplicate key in UnicodeData')
55          ret[key] = f[1:]
56      ud.close()
57      return ret