|
|
https://github.com/google/mozc/issues/462
|
|
|
|
|
|
--- /src/dictionary/gen_pos_map.py
|
|
|
+++ /src/dictionary/gen_pos_map.py
|
|
|
@@ -39,7 +39,7 @@
|
|
|
from build_tools import code_generator_util
|
|
|
|
|
|
|
|
|
-HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
|
|
|
+HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved.
|
|
|
// Author: keni
|
|
|
|
|
|
#ifndef MOZC_DICTIONARY_POS_MAP_H_
|
|
|
@@ -48,13 +48,13 @@
|
|
|
// POS conversion rules
|
|
|
const POSMap kPOSMap[] = {
|
|
|
"""
|
|
|
-FOOTER = """};
|
|
|
+FOOTER = b"""};
|
|
|
|
|
|
#endif // MOZC_DICTIONARY_POS_MAP_H_
|
|
|
"""
|
|
|
|
|
|
def ParseUserPos(user_pos_file):
|
|
|
- with open(user_pos_file, 'r') as stream:
|
|
|
+ with open(user_pos_file, 'rb') as stream:
|
|
|
stream = code_generator_util.SkipLineComment(stream)
|
|
|
stream = code_generator_util.ParseColumnStream(stream, num_column=2)
|
|
|
return dict((key, enum_value) for key, enum_value in stream)
|
|
|
@@ -64,7 +64,7 @@
|
|
|
user_pos_map = ParseUserPos(user_pos_file)
|
|
|
|
|
|
result = {}
|
|
|
- with open(third_party_pos_map_file, 'r') as stream:
|
|
|
+ with open(third_party_pos_map_file, 'rb') as stream:
|
|
|
stream = code_generator_util.SkipLineComment(stream)
|
|
|
for columns in code_generator_util.ParseColumnStream(stream, num_column=2):
|
|
|
third_party_pos_name, mozc_pos = (columns + [None])[:2]
|
|
|
@@ -78,7 +78,7 @@
|
|
|
result[third_party_pos_name] = mozc_pos
|
|
|
|
|
|
# Create mozc_pos to mozc_pos map.
|
|
|
- for key, value in user_pos_map.iteritems():
|
|
|
+ for key, value in user_pos_map.items():
|
|
|
if key in result:
|
|
|
assert (result[key] == value)
|
|
|
continue
|
|
|
@@ -94,10 +94,10 @@
|
|
|
if value is None:
|
|
|
# Invalid PosType.
|
|
|
value = (
|
|
|
- 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
|
|
|
+ b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
|
|
|
else:
|
|
|
- value = '::mozc::user_dictionary::UserDictionary::' + value
|
|
|
- output.write(' { %s, %s },\n' % (key, value))
|
|
|
+ value = b'::mozc::user_dictionary::UserDictionary::' + value
|
|
|
+ output.write(b' { %s, %s },\n' % (key, value))
|
|
|
output.write(FOOTER)
|
|
|
|
|
|
|
|
|
@@ -121,7 +121,7 @@
|
|
|
pos_map = GeneratePosMap(options.third_party_pos_map_file,
|
|
|
options.user_pos_file)
|
|
|
|
|
|
- with open(options.output, 'w') as stream:
|
|
|
+ with open(options.output, 'wb') as stream:
|
|
|
OutputPosMap(pos_map, stream)
|
|
|
|
|
|
|
|
|
--- /src/dictionary/gen_pos_rewrite_rule.py
|
|
|
+++ /src/dictionary/gen_pos_rewrite_rule.py
|
|
|
@@ -46,29 +46,34 @@
|
|
|
|
|
|
|
|
|
def LoadRewriteMapRule(filename):
|
|
|
- fh = open(filename)
|
|
|
+ fh = open(filename, 'rb')
|
|
|
rule = []
|
|
|
for line in fh:
|
|
|
- line = line.rstrip('\n')
|
|
|
- if not line or line.startswith('#'):
|
|
|
+ line = line.rstrip(b'\n')
|
|
|
+ if not line or line.startswith(b'#'):
|
|
|
continue
|
|
|
fields = line.split()
|
|
|
rule.append([fields[0], fields[1]])
|
|
|
+ fh.close()
|
|
|
return rule
|
|
|
|
|
|
|
|
|
def ReadPOSID(id_file, special_pos_file):
|
|
|
pos_list = []
|
|
|
|
|
|
- for line in open(id_file, 'r'):
|
|
|
+ fh = open(id_file, 'rb')
|
|
|
+ for line in fh:
|
|
|
fields = line.split()
|
|
|
pos_list.append(fields[1])
|
|
|
+ fh.close()
|
|
|
|
|
|
- for line in open(special_pos_file, 'r'):
|
|
|
- if len(line) <= 1 or line[0] == '#':
|
|
|
+ fh = open(special_pos_file, 'rb')
|
|
|
+ for line in fh:
|
|
|
+ if len(line) <= 1 or line[0:1] == b'#':
|
|
|
continue
|
|
|
fields = line.split()
|
|
|
pos_list.append(fields[0])
|
|
|
+ fh.close()
|
|
|
|
|
|
return pos_list
|
|
|
|
|
|
@@ -112,7 +117,7 @@
|
|
|
ids.append(id)
|
|
|
|
|
|
with open(opts.output, 'wb') as f:
|
|
|
- f.write(''.join(chr(id) for id in ids))
|
|
|
+ f.write(''.join(chr(id) for id in ids).encode('utf-8'))
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
--- /src/dictionary/gen_suffix_data.py
|
|
|
+++ /src/dictionary/gen_suffix_data.py
|
|
|
@@ -52,10 +52,10 @@
|
|
|
opts = _ParseOptions()
|
|
|
|
|
|
result = []
|
|
|
- with open(opts.input, 'r') as stream:
|
|
|
+ with open(opts.input, 'rb') as stream:
|
|
|
for line in stream:
|
|
|
- line = line.rstrip('\r\n')
|
|
|
- fields = line.split('\t')
|
|
|
+ line = line.rstrip(b'\r\n')
|
|
|
+ fields = line.split(b'\t')
|
|
|
key = fields[0]
|
|
|
lid = int(fields[1])
|
|
|
rid = int(fields[2])
|
|
|
@@ -63,7 +63,7 @@
|
|
|
value = fields[4]
|
|
|
|
|
|
if key == value:
|
|
|
- value = ''
|
|
|
+ value = b''
|
|
|
|
|
|
result.append((key, value, lid, rid, cost))
|
|
|
|
|
|
--- /src/dictionary/gen_user_pos_data.py
|
|
|
+++ /src/dictionary/gen_user_pos_data.py
|
|
|
@@ -64,7 +64,7 @@
|
|
|
f.write(struct.pack('<H', conjugation_id))
|
|
|
|
|
|
serialized_string_array_builder.SerializeToFile(
|
|
|
- sorted(string_index.iterkeys()), output_string_array)
|
|
|
+ sorted(x.encode('utf-8') for x in string_index.keys()), output_string_array)
|
|
|
|
|
|
|
|
|
def ParseOptions():
|
|
|
@@ -100,7 +100,7 @@
|
|
|
|
|
|
if options.output_pos_list:
|
|
|
serialized_string_array_builder.SerializeToFile(
|
|
|
- [pos for (pos, _) in user_pos.data], options.output_pos_list)
|
|
|
+ [pos.encode('utf-8') for (pos, _) in user_pos.data], options.output_pos_list)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
--- /src/dictionary/gen_zip_code_seed.py
|
|
|
+++ /src/dictionary/gen_zip_code_seed.py
|
|
|
@@ -83,7 +83,7 @@
|
|
|
address = unicodedata.normalize('NFKC', self.address)
|
|
|
line = '\t'.join([zip_code, '0', '0', str(ZIP_CODE_COST),
|
|
|
address, ZIP_CODE_LABEL])
|
|
|
- print line.encode('utf-8')
|
|
|
+ print(line.encode('utf-8'))
|
|
|
|
|
|
|
|
|
def ProcessZipCodeCSV(file_name):
|
|
|
@@ -105,26 +105,26 @@
|
|
|
|
|
|
def ReadZipCodeEntries(zip_code, level1, level2, level3):
|
|
|
"""Read zip code entries."""
|
|
|
- return [ZipEntry(zip_code, u''.join([level1, level2, town]))
|
|
|
+ return [ZipEntry(zip_code, ''.join([level1, level2, town]))
|
|
|
for town in ParseTownName(level3)]
|
|
|
|
|
|
|
|
|
def ReadJigyosyoEntry(zip_code, level1, level2, level3, name):
|
|
|
"""Read jigyosyo entry."""
|
|
|
return ZipEntry(zip_code,
|
|
|
- u''.join([level1, level2, level3, u' ', name]))
|
|
|
+ ''.join([level1, level2, level3, ' ', name]))
|
|
|
|
|
|
|
|
|
def ParseTownName(level3):
|
|
|
"""Parse town name."""
|
|
|
- if level3.find(u'以下に掲載がない場合') != -1:
|
|
|
+ if level3.find('以下に掲載がない場合') != -1:
|
|
|
return ['']
|
|
|
|
|
|
assert CanParseAddress(level3), ('failed to be merged %s'
|
|
|
% level3.encode('utf-8'))
|
|
|
|
|
|
# We ignore additional information here.
|
|
|
- level3 = re.sub(u'(.*)', u'', level3, re.U)
|
|
|
+ level3 = re.sub('(.*)', '', level3, re.U)
|
|
|
|
|
|
# For 地割, we have these cases.
|
|
|
# XX1地割
|
|
|
@@ -134,7 +134,7 @@
|
|
|
# XX第1地割、XX第2地割、
|
|
|
# XX第1地割〜XX第2地割、
|
|
|
# We simply use XX for them.
|
|
|
- chiwari_match = re.match(u'(\D*?)第?\d+地割.*', level3, re.U)
|
|
|
+ chiwari_match = re.match('(\D*?)第?\d+地割.*', level3, re.U)
|
|
|
if chiwari_match:
|
|
|
town = chiwari_match.group(1)
|
|
|
return [town]
|
|
|
@@ -144,21 +144,21 @@
|
|
|
# -> XX町YY and (XX町)ZZ
|
|
|
# YY、ZZ
|
|
|
# -> YY and ZZ
|
|
|
- chou_match = re.match(u'(.*町)?(.*)', level3, re.U)
|
|
|
+ chou_match = re.match('(.*町)?(.*)', level3, re.U)
|
|
|
if chou_match:
|
|
|
- chou = u''
|
|
|
+ chou = ''
|
|
|
if chou_match.group(1):
|
|
|
chou = chou_match.group(1)
|
|
|
rests = chou_match.group(2)
|
|
|
- return [chou + rest for rest in rests.split(u'、')]
|
|
|
+ return [chou + rest for rest in rests.split('、')]
|
|
|
|
|
|
return [level3]
|
|
|
|
|
|
|
|
|
def CanParseAddress(address):
|
|
|
"""Return true for valid address."""
|
|
|
- return (address.find(u'(') == -1 or
|
|
|
- address.find(u')') != -1)
|
|
|
+ return (address.find('(') == -1 or
|
|
|
+ address.find(')') != -1)
|
|
|
|
|
|
|
|
|
def ParseOptions():
|
|
|
--- /src/dictionary/zip_code_util.py
|
|
|
+++ /src/dictionary/zip_code_util.py
|
|
|
@@ -86,11 +86,11 @@
|
|
|
|
|
|
|
|
|
_SPECIAL_CASES = [
|
|
|
- SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']),
|
|
|
- SpecialMergeZip(u'8710046', u'大分県', u'中津市',
|
|
|
- [u'金谷', u'西堀端', u'東堀端', u'古金谷']),
|
|
|
- SpecialMergeZip(u'9218046', u'石川県', u'金沢市',
|
|
|
- [u'大桑町', u'三小牛町']),
|
|
|
+ SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']),
|
|
|
+ SpecialMergeZip('8710046', '大分県', '中津市',
|
|
|
+ ['金谷', '西堀端', '東堀端', '古金谷']),
|
|
|
+ SpecialMergeZip('9218046', '石川県', '金沢市',
|
|
|
+ ['大桑町', '三小牛町']),
|
|
|
]
|
|
|
|
|
|
|
|
|
--- /src/gui/character_pad/data/gen_cp932_map.py
|
|
|
+++ /src/gui/character_pad/data/gen_cp932_map.py
|
|
|
@@ -32,7 +32,6 @@
|
|
|
|
|
|
import re
|
|
|
import sys
|
|
|
-import string
|
|
|
|
|
|
kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}')
|
|
|
def IsValidUnicode(n):
|
|
|
@@ -42,28 +41,29 @@
|
|
|
fh = open(sys.argv[1])
|
|
|
result = {}
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
- array = string.split(line)
|
|
|
+ array = line.split()
|
|
|
sjis = array[0]
|
|
|
ucs2 = array[1]
|
|
|
if eval(sjis) < 32 or not IsValidUnicode(ucs2):
|
|
|
continue
|
|
|
result.setdefault(ucs2, sjis)
|
|
|
+ fh.close()
|
|
|
|
|
|
keys = sorted(result.keys())
|
|
|
|
|
|
- print "struct CP932MapData {"
|
|
|
- print " unsigned int ucs4;"
|
|
|
- print " unsigned short int sjis;"
|
|
|
- print "};"
|
|
|
- print ""
|
|
|
- print "static const size_t kCP932MapDataSize = %d;" % (len(keys))
|
|
|
- print "static const CP932MapData kCP932MapData[] = {"
|
|
|
+ print("struct CP932MapData {")
|
|
|
+ print(" unsigned int ucs4;")
|
|
|
+ print(" unsigned short int sjis;")
|
|
|
+ print("};")
|
|
|
+ print("")
|
|
|
+ print("static const size_t kCP932MapDataSize = %d;" % (len(keys)))
|
|
|
+ print("static const CP932MapData kCP932MapData[] = {")
|
|
|
for n in keys:
|
|
|
- print " { %s, %s }," % (n ,result[n])
|
|
|
- print " { 0, 0 }";
|
|
|
- print "};"
|
|
|
+ print(" { %s, %s }," % (n ,result[n]))
|
|
|
+ print(" { 0, 0 }");
|
|
|
+ print("};")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
--- /src/gui/character_pad/data/gen_local_character_map.py
|
|
|
+++ /src/gui/character_pad/data/gen_local_character_map.py
|
|
|
@@ -30,7 +30,6 @@
|
|
|
|
|
|
__author__ = "taku"
|
|
|
|
|
|
-import string
|
|
|
import re
|
|
|
import sys
|
|
|
|
|
|
@@ -43,9 +42,9 @@
|
|
|
fh = open(filename)
|
|
|
result = []
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
- array = string.split(line)
|
|
|
+ array = line.split()
|
|
|
jis = array[0].replace('0x', '')
|
|
|
ucs2 = array[1].replace('0x', '')
|
|
|
if len(jis) == 2:
|
|
|
@@ -53,6 +52,7 @@
|
|
|
|
|
|
if IsValidUnicode(ucs2):
|
|
|
result.append([jis, ucs2])
|
|
|
+ fh.close()
|
|
|
|
|
|
return ["JISX0201", result]
|
|
|
|
|
|
@@ -60,13 +60,14 @@
|
|
|
fh = open(filename)
|
|
|
result = []
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
array = line.split()
|
|
|
jis = array[1].replace('0x', '')
|
|
|
ucs2 = array[2].replace('0x', '')
|
|
|
if IsValidUnicode(ucs2):
|
|
|
result.append([jis, ucs2])
|
|
|
+ fh.close()
|
|
|
|
|
|
return ["JISX0208", result]
|
|
|
|
|
|
@@ -74,13 +75,14 @@
|
|
|
fh = open(filename)
|
|
|
result = []
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
array = line.split()
|
|
|
jis = array[0].replace('0x', '')
|
|
|
ucs2 = array[1].replace('0x', '')
|
|
|
if IsValidUnicode(ucs2):
|
|
|
result.append([jis, ucs2])
|
|
|
+ fh.close()
|
|
|
|
|
|
return ["JISX0212", result]
|
|
|
|
|
|
@@ -88,7 +90,7 @@
|
|
|
fh = open(filename)
|
|
|
result = []
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
array = line.split()
|
|
|
sjis = array[0].replace('0x', '')
|
|
|
@@ -100,19 +102,20 @@
|
|
|
|
|
|
if IsValidUnicode(ucs2):
|
|
|
result.append([sjis, ucs2])
|
|
|
+ fh.close()
|
|
|
|
|
|
return ["CP932", result]
|
|
|
|
|
|
def Output(arg):
|
|
|
name = arg[0]
|
|
|
result = arg[1]
|
|
|
- print "static const size_t k%sMapSize = %d;" % (name, len(result))
|
|
|
- print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)
|
|
|
+ print("static const size_t k%sMapSize = %d;" % (name, len(result)))
|
|
|
+ print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name))
|
|
|
for n in result:
|
|
|
- print " { 0x%s, 0x%s }," % (n[0] ,n[1])
|
|
|
- print " { 0, 0 }";
|
|
|
- print "};"
|
|
|
- print ""
|
|
|
+ print(" { 0x%s, 0x%s }," % (n[0] ,n[1]))
|
|
|
+ print(" { 0, 0 }");
|
|
|
+ print("};")
|
|
|
+ print("")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
Output(LoadJISX0201(sys.argv[1]))
|
|
|
--- /src/gui/character_pad/data/gen_unicode_blocks.py
|
|
|
+++ /src/gui/character_pad/data/gen_unicode_blocks.py
|
|
|
@@ -33,13 +33,13 @@
|
|
|
import sys
|
|
|
import re
|
|
|
|
|
|
-re = re.compile('^(.....?)\.\.(.....?); (.+)')
|
|
|
+re = re.compile(r'^(.....?)\.\.(.....?); (.+)')
|
|
|
|
|
|
def main():
|
|
|
- print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {"
|
|
|
+ print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {")
|
|
|
fh = open(sys.argv[1])
|
|
|
for line in fh.readlines():
|
|
|
- if line[0] is '#':
|
|
|
+ if line[0] == '#':
|
|
|
continue
|
|
|
m = re.match(line)
|
|
|
if m is not None:
|
|
|
@@ -47,11 +47,12 @@
|
|
|
end = int(m.group(2), 16)
|
|
|
name = m.group(3)
|
|
|
if start <= 0x2FFFF and end <= 0x2FFFF:
|
|
|
- print " { \"%s\", { %d, %d } }," % (name, start, end)
|
|
|
+ print(" { \"%s\", { %d, %d } }," % (name, start, end))
|
|
|
+ fh.close()
|
|
|
|
|
|
- print " { NULL, { 0, 0 } }"
|
|
|
- print "};"
|
|
|
- print ""
|
|
|
+ print(" { NULL, { 0, 0 } }")
|
|
|
+ print("};")
|
|
|
+ print("")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
--- /src/gui/character_pad/data/gen_unicode_data.py
|
|
|
+++ /src/gui/character_pad/data/gen_unicode_data.py
|
|
|
@@ -46,18 +46,19 @@
|
|
|
code = int(code, 16)
|
|
|
if code < 0x2FFFF:
|
|
|
results.append(" { %d, \"%s\" }," % (code, desc))
|
|
|
+ fh.close()
|
|
|
|
|
|
- print "struct UnicodeData {";
|
|
|
- print " char32 ucs4;";
|
|
|
- print " const char *description;";
|
|
|
- print "};";
|
|
|
- print ""
|
|
|
- print "static const size_t kUnicodeDataSize = %d;" % (len(results))
|
|
|
- print "static const UnicodeData kUnicodeData[] = {";
|
|
|
+ print("struct UnicodeData {");
|
|
|
+ print(" char32 ucs4;");
|
|
|
+ print(" const char *description;");
|
|
|
+ print("};");
|
|
|
+ print("")
|
|
|
+ print("static const size_t kUnicodeDataSize = %d;" % (len(results)))
|
|
|
+ print("static const UnicodeData kUnicodeData[] = {");
|
|
|
for line in results:
|
|
|
- print line;
|
|
|
- print " { 0, NULL }";
|
|
|
- print "};";
|
|
|
+ print(line);
|
|
|
+ print(" { 0, NULL }");
|
|
|
+ print("};");
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
--- /src/gui/character_pad/data/gen_unihan_data.py
|
|
|
+++ /src/gui/character_pad/data/gen_unihan_data.py
|
|
|
@@ -31,35 +31,34 @@
|
|
|
__author__ = "taku"
|
|
|
|
|
|
import re
|
|
|
-import string
|
|
|
import sys
|
|
|
rs = {}
|
|
|
|
|
|
def Escape(n):
|
|
|
- if n is not "NULL":
|
|
|
+ if n != "NULL":
|
|
|
return "\"%s\"" % (n)
|
|
|
else:
|
|
|
return "NULL"
|
|
|
|
|
|
def GetCode(n):
|
|
|
- if n is not "NULL":
|
|
|
- n = string.replace(n, '0-', 'JIS X 0208: 0x')
|
|
|
- n = string.replace(n, '1-', 'JIS X 0212: 0x')
|
|
|
- n = string.replace(n, '3-', 'JIS X 0213: 0x')
|
|
|
- n = string.replace(n, '4-', 'JIS X 0213: 0x')
|
|
|
- n = string.replace(n, 'A-', 'Vendors Ideographs: 0x')
|
|
|
- n = string.replace(n, '3A', 'JIS X 0213 2000: 0x')
|
|
|
+ if n != "NULL":
|
|
|
+ n = n.replace('0-', 'JIS X 0208: 0x')
|
|
|
+ n = n.replace('1-', 'JIS X 0212: 0x')
|
|
|
+ n = n.replace('3-', 'JIS X 0213: 0x')
|
|
|
+ n = n.replace('4-', 'JIS X 0213: 0x')
|
|
|
+ n = n.replace('A-', 'Vendors Ideographs: 0x')
|
|
|
+ n = n.replace('3A', 'JIS X 0213 2000: 0x')
|
|
|
return "\"%s\"" % n
|
|
|
else:
|
|
|
return "NULL"
|
|
|
|
|
|
def GetRadical(n):
|
|
|
pat = re.compile(r'^(\d+)\.')
|
|
|
- if n is not "NULL":
|
|
|
+ if n != "NULL":
|
|
|
m = pat.match(n)
|
|
|
if m:
|
|
|
result = rs[m.group(1)]
|
|
|
- return "\"%s\"" % (result.encode('string_escape'))
|
|
|
+ return "\"%s\"" % result
|
|
|
else:
|
|
|
return "NULL"
|
|
|
else:
|
|
|
@@ -73,6 +72,7 @@
|
|
|
id = array[1]
|
|
|
radical = array[2]
|
|
|
rs[id] = radical
|
|
|
+ fh.close()
|
|
|
|
|
|
dic = {}
|
|
|
pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)')
|
|
|
@@ -86,23 +86,24 @@
|
|
|
n = int(m.group(1), 16)
|
|
|
if n <= 65536:
|
|
|
dic.setdefault(key, {}).setdefault(field, value)
|
|
|
+ fh.close()
|
|
|
|
|
|
keys = sorted(dic.keys())
|
|
|
|
|
|
- print "struct UnihanData {";
|
|
|
- print " unsigned int ucs4;";
|
|
|
+ print("struct UnihanData {");
|
|
|
+ print(" unsigned int ucs4;");
|
|
|
# Since the total strokes defined in Unihan data is Chinese-based
|
|
|
# number, we can't use it.
|
|
|
# print " unsigned char total_strokes;";
|
|
|
- print " const char *japanese_kun;";
|
|
|
- print " const char *japanese_on;";
|
|
|
+ print(" const char *japanese_kun;");
|
|
|
+ print(" const char *japanese_on;");
|
|
|
# Since the radical information defined in Unihan data is Chinese-based
|
|
|
# number, we can't use it.
|
|
|
# print " const char *radical;";
|
|
|
- print " const char *IRG_jsource;";
|
|
|
- print "};"
|
|
|
- print "static const size_t kUnihanDataSize = %d;" % (len(keys))
|
|
|
- print "static const UnihanData kUnihanData[] = {"
|
|
|
+ print(" const char *IRG_jsource;");
|
|
|
+ print("};")
|
|
|
+ print("static const size_t kUnihanDataSize = %d;" % (len(keys)))
|
|
|
+ print("static const UnihanData kUnihanData[] = {")
|
|
|
|
|
|
for key in keys:
|
|
|
total_strokes = dic[key].get("kTotalStrokes", "0")
|
|
|
@@ -111,9 +112,9 @@
|
|
|
rad = GetRadical(dic[key].get("kRSUnicode", "NULL"))
|
|
|
code = GetCode(dic[key].get("kIRG_JSource", "NULL"))
|
|
|
# print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code)
|
|
|
- print " { 0x%s, %s, %s, %s }," % (key, kun, on, code)
|
|
|
+ print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code))
|
|
|
|
|
|
- print "};"
|
|
|
+ print("};")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|