You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gentoo-overlay/app-i18n/mozc/files/mozc-2.23.2815.102-python-3...

584 lines
17 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

https://github.com/google/mozc/issues/462
--- /src/dictionary/gen_pos_map.py
+++ /src/dictionary/gen_pos_map.py
@@ -39,7 +39,7 @@
from build_tools import code_generator_util
-HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
+HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved.
// Author: keni
#ifndef MOZC_DICTIONARY_POS_MAP_H_
@@ -48,13 +48,13 @@
// POS conversion rules
const POSMap kPOSMap[] = {
"""
-FOOTER = """};
+FOOTER = b"""};
#endif // MOZC_DICTIONARY_POS_MAP_H_
"""
def ParseUserPos(user_pos_file):
- with open(user_pos_file, 'r') as stream:
+ with open(user_pos_file, 'rb') as stream:
stream = code_generator_util.SkipLineComment(stream)
stream = code_generator_util.ParseColumnStream(stream, num_column=2)
return dict((key, enum_value) for key, enum_value in stream)
@@ -64,7 +64,7 @@
user_pos_map = ParseUserPos(user_pos_file)
result = {}
- with open(third_party_pos_map_file, 'r') as stream:
+ with open(third_party_pos_map_file, 'rb') as stream:
stream = code_generator_util.SkipLineComment(stream)
for columns in code_generator_util.ParseColumnStream(stream, num_column=2):
third_party_pos_name, mozc_pos = (columns + [None])[:2]
@@ -78,7 +78,7 @@
result[third_party_pos_name] = mozc_pos
# Create mozc_pos to mozc_pos map.
- for key, value in user_pos_map.iteritems():
+ for key, value in user_pos_map.items():
if key in result:
assert (result[key] == value)
continue
@@ -94,10 +94,10 @@
if value is None:
# Invalid PosType.
value = (
- 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
+ b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
else:
- value = '::mozc::user_dictionary::UserDictionary::' + value
- output.write(' { %s, %s },\n' % (key, value))
+ value = b'::mozc::user_dictionary::UserDictionary::' + value
+ output.write(b' { %s, %s },\n' % (key, value))
output.write(FOOTER)
@@ -121,7 +121,7 @@
pos_map = GeneratePosMap(options.third_party_pos_map_file,
options.user_pos_file)
- with open(options.output, 'w') as stream:
+ with open(options.output, 'wb') as stream:
OutputPosMap(pos_map, stream)
--- /src/dictionary/gen_pos_rewrite_rule.py
+++ /src/dictionary/gen_pos_rewrite_rule.py
@@ -46,29 +46,34 @@
def LoadRewriteMapRule(filename):
- fh = open(filename)
+ fh = open(filename, 'rb')
rule = []
for line in fh:
- line = line.rstrip('\n')
- if not line or line.startswith('#'):
+ line = line.rstrip(b'\n')
+ if not line or line.startswith(b'#'):
continue
fields = line.split()
rule.append([fields[0], fields[1]])
+ fh.close()
return rule
def ReadPOSID(id_file, special_pos_file):
pos_list = []
- for line in open(id_file, 'r'):
+ fh = open(id_file, 'rb')
+ for line in fh:
fields = line.split()
pos_list.append(fields[1])
+ fh.close()
- for line in open(special_pos_file, 'r'):
- if len(line) <= 1 or line[0] == '#':
+ fh = open(special_pos_file, 'rb')
+ for line in fh:
+ if len(line) <= 1 or line[0:1] == b'#':
continue
fields = line.split()
pos_list.append(fields[0])
+ fh.close()
return pos_list
@@ -112,7 +117,7 @@
ids.append(id)
with open(opts.output, 'wb') as f:
- f.write(''.join(chr(id) for id in ids))
+ f.write(''.join(chr(id) for id in ids).encode('utf-8'))
if __name__ == '__main__':
--- /src/dictionary/gen_suffix_data.py
+++ /src/dictionary/gen_suffix_data.py
@@ -52,10 +52,10 @@
opts = _ParseOptions()
result = []
- with open(opts.input, 'r') as stream:
+ with open(opts.input, 'rb') as stream:
for line in stream:
- line = line.rstrip('\r\n')
- fields = line.split('\t')
+ line = line.rstrip(b'\r\n')
+ fields = line.split(b'\t')
key = fields[0]
lid = int(fields[1])
rid = int(fields[2])
@@ -63,7 +63,7 @@
value = fields[4]
if key == value:
- value = ''
+ value = b''
result.append((key, value, lid, rid, cost))
--- /src/dictionary/gen_user_pos_data.py
+++ /src/dictionary/gen_user_pos_data.py
@@ -64,7 +64,7 @@
f.write(struct.pack('<H', conjugation_id))
serialized_string_array_builder.SerializeToFile(
- sorted(string_index.iterkeys()), output_string_array)
+ sorted(x.encode('utf-8') for x in string_index.keys()), output_string_array)
def ParseOptions():
@@ -100,7 +100,7 @@
if options.output_pos_list:
serialized_string_array_builder.SerializeToFile(
- [pos for (pos, _) in user_pos.data], options.output_pos_list)
+ [pos.encode('utf-8') for (pos, _) in user_pos.data], options.output_pos_list)
if __name__ == '__main__':
--- /src/dictionary/gen_zip_code_seed.py
+++ /src/dictionary/gen_zip_code_seed.py
@@ -83,7 +83,7 @@
address = unicodedata.normalize('NFKC', self.address)
line = '\t'.join([zip_code, '0', '0', str(ZIP_CODE_COST),
address, ZIP_CODE_LABEL])
- print line.encode('utf-8')
+ print(line.encode('utf-8'))
def ProcessZipCodeCSV(file_name):
@@ -105,26 +105,26 @@
def ReadZipCodeEntries(zip_code, level1, level2, level3):
"""Read zip code entries."""
- return [ZipEntry(zip_code, u''.join([level1, level2, town]))
+ return [ZipEntry(zip_code, ''.join([level1, level2, town]))
for town in ParseTownName(level3)]
def ReadJigyosyoEntry(zip_code, level1, level2, level3, name):
"""Read jigyosyo entry."""
return ZipEntry(zip_code,
- u''.join([level1, level2, level3, u' ', name]))
+ ''.join([level1, level2, level3, ' ', name]))
def ParseTownName(level3):
"""Parse town name."""
- if level3.find(u'以下に掲載がない場合') != -1:
+ if level3.find('以下に掲載がない場合') != -1:
return ['']
assert CanParseAddress(level3), ('failed to be merged %s'
% level3.encode('utf-8'))
# We ignore additional information here.
- level3 = re.sub(u'.*', u'', level3, re.U)
+ level3 = re.sub('.*', '', level3, re.U)
# For 地割, we have these cases.
# XX1地割
@@ -134,7 +134,7 @@
# XX第1地割、XX第2地割、
# XX第1地割〜XX第2地割、
# We simply use XX for them.
- chiwari_match = re.match(u'(\D*?)第?\d+地割.*', level3, re.U)
+ chiwari_match = re.match('(\D*?)第?\d+地割.*', level3, re.U)
if chiwari_match:
town = chiwari_match.group(1)
return [town]
@@ -144,21 +144,21 @@
# -> XX町YY and (XX町)ZZ
# YY、ZZ
# -> YY and ZZ
- chou_match = re.match(u'(.*町)?(.*)', level3, re.U)
+ chou_match = re.match('(.*町)?(.*)', level3, re.U)
if chou_match:
- chou = u''
+ chou = ''
if chou_match.group(1):
chou = chou_match.group(1)
rests = chou_match.group(2)
- return [chou + rest for rest in rests.split(u'、')]
+ return [chou + rest for rest in rests.split('、')]
return [level3]
def CanParseAddress(address):
"""Return true for valid address."""
- return (address.find(u'') == -1 or
- address.find(u'') != -1)
+ return (address.find('') == -1 or
+ address.find('') != -1)
def ParseOptions():
--- /src/dictionary/zip_code_util.py
+++ /src/dictionary/zip_code_util.py
@@ -86,11 +86,11 @@
_SPECIAL_CASES = [
- SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']),
- SpecialMergeZip(u'8710046', u'大分県', u'中津市',
- [u'金谷', u'西堀端', u'東堀端', u'古金谷']),
- SpecialMergeZip(u'9218046', u'石川県', u'金沢市',
- [u'大桑町', u'三小牛町']),
+ SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']),
+ SpecialMergeZip('8710046', '大分県', '中津市',
+ ['金谷', '西堀端', '東堀端', '古金谷']),
+ SpecialMergeZip('9218046', '石川県', '金沢市',
+ ['大桑町', '三小牛町']),
]
--- /src/gui/character_pad/data/gen_cp932_map.py
+++ /src/gui/character_pad/data/gen_cp932_map.py
@@ -32,7 +32,6 @@
import re
import sys
-import string
kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}')
def IsValidUnicode(n):
@@ -42,28 +41,29 @@
fh = open(sys.argv[1])
result = {}
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
- array = string.split(line)
+ array = line.split()
sjis = array[0]
ucs2 = array[1]
if eval(sjis) < 32 or not IsValidUnicode(ucs2):
continue
result.setdefault(ucs2, sjis)
+ fh.close()
keys = sorted(result.keys())
- print "struct CP932MapData {"
- print " unsigned int ucs4;"
- print " unsigned short int sjis;"
- print "};"
- print ""
- print "static const size_t kCP932MapDataSize = %d;" % (len(keys))
- print "static const CP932MapData kCP932MapData[] = {"
+ print("struct CP932MapData {")
+ print(" unsigned int ucs4;")
+ print(" unsigned short int sjis;")
+ print("};")
+ print("")
+ print("static const size_t kCP932MapDataSize = %d;" % (len(keys)))
+ print("static const CP932MapData kCP932MapData[] = {")
for n in keys:
- print " { %s, %s }," % (n ,result[n])
- print " { 0, 0 }";
- print "};"
+ print(" { %s, %s }," % (n ,result[n]))
+ print(" { 0, 0 }");
+ print("};")
if __name__ == "__main__":
main()
--- /src/gui/character_pad/data/gen_local_character_map.py
+++ /src/gui/character_pad/data/gen_local_character_map.py
@@ -30,7 +30,6 @@
__author__ = "taku"
-import string
import re
import sys
@@ -43,9 +42,9 @@
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
- array = string.split(line)
+ array = line.split()
jis = array[0].replace('0x', '')
ucs2 = array[1].replace('0x', '')
if len(jis) == 2:
@@ -53,6 +52,7 @@
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0201", result]
@@ -60,13 +60,14 @@
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
jis = array[1].replace('0x', '')
ucs2 = array[2].replace('0x', '')
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0208", result]
@@ -74,13 +75,14 @@
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
jis = array[0].replace('0x', '')
ucs2 = array[1].replace('0x', '')
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0212", result]
@@ -88,7 +90,7 @@
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
sjis = array[0].replace('0x', '')
@@ -100,19 +102,20 @@
if IsValidUnicode(ucs2):
result.append([sjis, ucs2])
+ fh.close()
return ["CP932", result]
def Output(arg):
name = arg[0]
result = arg[1]
- print "static const size_t k%sMapSize = %d;" % (name, len(result))
- print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)
+ print("static const size_t k%sMapSize = %d;" % (name, len(result)))
+ print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name))
for n in result:
- print " { 0x%s, 0x%s }," % (n[0] ,n[1])
- print " { 0, 0 }";
- print "};"
- print ""
+ print(" { 0x%s, 0x%s }," % (n[0] ,n[1]))
+ print(" { 0, 0 }");
+ print("};")
+ print("")
if __name__ == "__main__":
Output(LoadJISX0201(sys.argv[1]))
--- /src/gui/character_pad/data/gen_unicode_blocks.py
+++ /src/gui/character_pad/data/gen_unicode_blocks.py
@@ -33,13 +33,13 @@
import sys
import re
-re = re.compile('^(.....?)\.\.(.....?); (.+)')
+re = re.compile(r'^(.....?)\.\.(.....?); (.+)')
def main():
- print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {"
+ print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {")
fh = open(sys.argv[1])
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
m = re.match(line)
if m is not None:
@@ -47,11 +47,12 @@
end = int(m.group(2), 16)
name = m.group(3)
if start <= 0x2FFFF and end <= 0x2FFFF:
- print " { \"%s\", { %d, %d } }," % (name, start, end)
+ print(" { \"%s\", { %d, %d } }," % (name, start, end))
+ fh.close()
- print " { NULL, { 0, 0 } }"
- print "};"
- print ""
+ print(" { NULL, { 0, 0 } }")
+ print("};")
+ print("")
if __name__ == "__main__":
main()
--- /src/gui/character_pad/data/gen_unicode_data.py
+++ /src/gui/character_pad/data/gen_unicode_data.py
@@ -46,18 +46,19 @@
code = int(code, 16)
if code < 0x2FFFF:
results.append(" { %d, \"%s\" }," % (code, desc))
+ fh.close()
- print "struct UnicodeData {";
- print " char32 ucs4;";
- print " const char *description;";
- print "};";
- print ""
- print "static const size_t kUnicodeDataSize = %d;" % (len(results))
- print "static const UnicodeData kUnicodeData[] = {";
+ print("struct UnicodeData {");
+ print(" char32 ucs4;");
+ print(" const char *description;");
+ print("};");
+ print("")
+ print("static const size_t kUnicodeDataSize = %d;" % (len(results)))
+ print("static const UnicodeData kUnicodeData[] = {");
for line in results:
- print line;
- print " { 0, NULL }";
- print "};";
+ print(line);
+ print(" { 0, NULL }");
+ print("};");
if __name__ == "__main__":
main()
--- /src/gui/character_pad/data/gen_unihan_data.py
+++ /src/gui/character_pad/data/gen_unihan_data.py
@@ -31,35 +31,34 @@
__author__ = "taku"
import re
-import string
import sys
rs = {}
def Escape(n):
- if n is not "NULL":
+ if n != "NULL":
return "\"%s\"" % (n)
else:
return "NULL"
def GetCode(n):
- if n is not "NULL":
- n = string.replace(n, '0-', 'JIS X 0208: 0x')
- n = string.replace(n, '1-', 'JIS X 0212: 0x')
- n = string.replace(n, '3-', 'JIS X 0213: 0x')
- n = string.replace(n, '4-', 'JIS X 0213: 0x')
- n = string.replace(n, 'A-', 'Vendors Ideographs: 0x')
- n = string.replace(n, '3A', 'JIS X 0213 2000: 0x')
+ if n != "NULL":
+ n = n.replace('0-', 'JIS X 0208: 0x')
+ n = n.replace('1-', 'JIS X 0212: 0x')
+ n = n.replace('3-', 'JIS X 0213: 0x')
+ n = n.replace('4-', 'JIS X 0213: 0x')
+ n = n.replace('A-', 'Vendors Ideographs: 0x')
+ n = n.replace('3A', 'JIS X 0213 2000: 0x')
return "\"%s\"" % n
else:
return "NULL"
def GetRadical(n):
pat = re.compile(r'^(\d+)\.')
- if n is not "NULL":
+ if n != "NULL":
m = pat.match(n)
if m:
result = rs[m.group(1)]
- return "\"%s\"" % (result.encode('string_escape'))
+ return "\"%s\"" % result
else:
return "NULL"
else:
@@ -73,6 +72,7 @@
id = array[1]
radical = array[2]
rs[id] = radical
+ fh.close()
dic = {}
pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)')
@@ -86,23 +86,24 @@
n = int(m.group(1), 16)
if n <= 65536:
dic.setdefault(key, {}).setdefault(field, value)
+ fh.close()
keys = sorted(dic.keys())
- print "struct UnihanData {";
- print " unsigned int ucs4;";
+ print("struct UnihanData {");
+ print(" unsigned int ucs4;");
# Since the total strokes defined in Unihan data is Chinese-based
# number, we can't use it.
# print " unsigned char total_strokes;";
- print " const char *japanese_kun;";
- print " const char *japanese_on;";
+ print(" const char *japanese_kun;");
+ print(" const char *japanese_on;");
# Since the radical information defined in Unihan data is Chinese-based
# number, we can't use it.
# print " const char *radical;";
- print " const char *IRG_jsource;";
- print "};"
- print "static const size_t kUnihanDataSize = %d;" % (len(keys))
- print "static const UnihanData kUnihanData[] = {"
+ print(" const char *IRG_jsource;");
+ print("};")
+ print("static const size_t kUnihanDataSize = %d;" % (len(keys)))
+ print("static const UnihanData kUnihanData[] = {")
for key in keys:
total_strokes = dic[key].get("kTotalStrokes", "0")
@@ -111,9 +112,9 @@
rad = GetRadical(dic[key].get("kRSUnicode", "NULL"))
code = GetCode(dic[key].get("kIRG_JSource", "NULL"))
# print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code)
- print " { 0x%s, %s, %s, %s }," % (key, kun, on, code)
+ print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code))
- print "};"
+ print("};")
if __name__ == "__main__":
main()