|
|
|
@ -1865,6 +1865,163 @@ class _file(_error):
|
|
|
|
|
self.__closeOldFile()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class utfBin:
|
|
|
|
|
"""Класс для преобразования в utf-8
|
|
|
|
|
|
|
|
|
|
преобразование бинарного или смеси бинарного и utf-8 кода в utf-8 и
|
|
|
|
|
обратное преобразование
|
|
|
|
|
методы класса encode и decode
|
|
|
|
|
"""
|
|
|
|
|
def _retUTF(self, char):
|
|
|
|
|
byte = ord(char)
|
|
|
|
|
if byte<=127:
|
|
|
|
|
return ('_ch_',1)
|
|
|
|
|
elif byte<=191:
|
|
|
|
|
return ('_nb_',1)
|
|
|
|
|
elif byte<=223:
|
|
|
|
|
return ('_fb_',2)
|
|
|
|
|
elif byte<=239:
|
|
|
|
|
return ('_fb_',3)
|
|
|
|
|
elif byte<=247:
|
|
|
|
|
return ('_fb_',4)
|
|
|
|
|
else:
|
|
|
|
|
return ('_er_',1)
|
|
|
|
|
|
|
|
|
|
def _sumbUtf(self, symbols, lenTail):
|
|
|
|
|
if not symbols:
|
|
|
|
|
return (False,0)
|
|
|
|
|
lenSymb = len(symbols)
|
|
|
|
|
if lenSymb >= 4:
|
|
|
|
|
l = 4
|
|
|
|
|
elif lenSymb >= 3:
|
|
|
|
|
l = 3
|
|
|
|
|
elif lenSymb >= 2:
|
|
|
|
|
l = 2
|
|
|
|
|
else:
|
|
|
|
|
if symbols[0] == '_ch_':
|
|
|
|
|
return (True,1)
|
|
|
|
|
else:
|
|
|
|
|
return (False,1)
|
|
|
|
|
result = False
|
|
|
|
|
for i in range(l):
|
|
|
|
|
if i == 0 and symbols[i] != '_fb_':
|
|
|
|
|
break
|
|
|
|
|
elif i > 0 and symbols[i] != '_nb_':
|
|
|
|
|
break
|
|
|
|
|
if lenTail>1 and lenTail != i:
|
|
|
|
|
return (False,1)
|
|
|
|
|
if i > 0:
|
|
|
|
|
result = True
|
|
|
|
|
return (result, i)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _intToChar(self, x):
|
|
|
|
|
he = hex(x)[-2:]
|
|
|
|
|
exec("ret = '\\x%s'" %he)
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
def _hexToChar(self, he):
|
|
|
|
|
exec("ret = '\\x%s'" %he)
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
|
|
def encode(self, text):
|
|
|
|
|
"""Кодирует смешанный формат в UTF-8"""
|
|
|
|
|
ind = 0
|
|
|
|
|
l = 0
|
|
|
|
|
utf = []
|
|
|
|
|
lenUtf = []
|
|
|
|
|
indErr = []
|
|
|
|
|
i = 0
|
|
|
|
|
for ch in text:
|
|
|
|
|
r, l = self._retUTF(ch)
|
|
|
|
|
utf.append(r)
|
|
|
|
|
lenUtf.append(l)
|
|
|
|
|
i+=1
|
|
|
|
|
while 1:
|
|
|
|
|
if utf[ind] == '_fb_':
|
|
|
|
|
res, l = self._sumbUtf(utf[ind:],lenUtf[ind])
|
|
|
|
|
if res == False:
|
|
|
|
|
indErr.append(ind)
|
|
|
|
|
if l>0:
|
|
|
|
|
ind +=l
|
|
|
|
|
if ind >= len(utf):
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
if utf[ind] != '_ch_':
|
|
|
|
|
indErr.append(ind)
|
|
|
|
|
ind +=1
|
|
|
|
|
if ind >= len(utf):
|
|
|
|
|
break
|
|
|
|
|
if indErr:
|
|
|
|
|
lenIndErr = len(indErr)
|
|
|
|
|
block = []
|
|
|
|
|
blocks = []
|
|
|
|
|
if lenIndErr > 1:
|
|
|
|
|
i = 1
|
|
|
|
|
while 1:
|
|
|
|
|
if i == 1:
|
|
|
|
|
block.append(indErr[i-1])
|
|
|
|
|
if indErr[i] - indErr[i-1] == 1:
|
|
|
|
|
block.append(indErr[i])
|
|
|
|
|
else:
|
|
|
|
|
if block:
|
|
|
|
|
blocks.append(block)
|
|
|
|
|
block = []
|
|
|
|
|
block.append(indErr[i])
|
|
|
|
|
i +=1
|
|
|
|
|
if i >= lenIndErr:
|
|
|
|
|
break
|
|
|
|
|
else:
|
|
|
|
|
block.append(indErr[0])
|
|
|
|
|
if block:
|
|
|
|
|
blocks.append(block)
|
|
|
|
|
listErr = []
|
|
|
|
|
for block in blocks:
|
|
|
|
|
string = ""
|
|
|
|
|
for elem in block:
|
|
|
|
|
string += hex(ord(text[elem]))[-2:]
|
|
|
|
|
listErr.append((block[0],"__hex__?%s?__hex__" %string,elem))
|
|
|
|
|
textOut = text
|
|
|
|
|
deltaInd = 0
|
|
|
|
|
for erEl in listErr:
|
|
|
|
|
startInd = erEl[0] + deltaInd
|
|
|
|
|
endInd = erEl[2] + 1 + deltaInd
|
|
|
|
|
textOut = textOut[:startInd] + erEl[1] + textOut[endInd:]
|
|
|
|
|
deltaInd += len(erEl[1])-(erEl[2]-erEl[0]+1)
|
|
|
|
|
#if i == 1:
|
|
|
|
|
#break
|
|
|
|
|
#i += 1
|
|
|
|
|
return textOut
|
|
|
|
|
|
|
|
|
|
def decode(self, text):
|
|
|
|
|
"""Декодирует UTF-8 в смешанный формат"""
|
|
|
|
|
varStart = "__hex__\?"
|
|
|
|
|
varEnd = "\?__hex__"
|
|
|
|
|
# -1 Это экранирование '?' которое тоже считается
|
|
|
|
|
deltVarStart = len(varStart)-1
|
|
|
|
|
deltVarEnd = len(varEnd)-1
|
|
|
|
|
reVar = re.compile(("%s[a-f0-9]+%s")%(varStart,varEnd),re.M)
|
|
|
|
|
resS = reVar.search(text)
|
|
|
|
|
textProfileTmp = text
|
|
|
|
|
while resS:
|
|
|
|
|
mark = textProfileTmp[resS.start():resS.end()]
|
|
|
|
|
hexString = mark[deltVarStart:-deltVarEnd]
|
|
|
|
|
i = 0
|
|
|
|
|
stringInsert = ""
|
|
|
|
|
hexCode = ""
|
|
|
|
|
for ch in hexString:
|
|
|
|
|
if i>=1:
|
|
|
|
|
hexCode += ch
|
|
|
|
|
stringInsert += self._hexToChar(hexCode)
|
|
|
|
|
hexCode = ""
|
|
|
|
|
i = 0
|
|
|
|
|
else:
|
|
|
|
|
hexCode += ch
|
|
|
|
|
i += 1
|
|
|
|
|
textProfileTmp = textProfileTmp.replace(mark, stringInsert)
|
|
|
|
|
resS = reVar.search(textProfileTmp)
|
|
|
|
|
return textProfileTmp
|
|
|
|
|
|
|
|
|
|
class profile(_file, _terms):
|
|
|
|
|
"""Класс для работы с профилями
|
|
|
|
|
|
|
|
|
@ -2425,11 +2582,6 @@ class profile(_file, _terms):
|
|
|
|
|
self.closeNewFile()
|
|
|
|
|
copyFile = True
|
|
|
|
|
if self.getFileType() != "bin":
|
|
|
|
|
# проверяем кодировку файлов
|
|
|
|
|
if not self.fileIsUtf(newFile):
|
|
|
|
|
self.setError(\
|
|
|
|
|
_("Can not decode file in UTF-8:") + " " + newFile)
|
|
|
|
|
return False
|
|
|
|
|
# Вычисляем условные блоки
|
|
|
|
|
self.newProfile = self.applyTermsProfile(self.newProfile,
|
|
|
|
|
newFile)
|
|
|
|
@ -2445,12 +2597,20 @@ class profile(_file, _terms):
|
|
|
|
|
copyFile)
|
|
|
|
|
if not objHeadNew:
|
|
|
|
|
return filesApply
|
|
|
|
|
# Флаг - кодировка с бинарными примесями у файла профиля включаем при
|
|
|
|
|
# условии текстового файла и кодировки отличной от UTF-8
|
|
|
|
|
flagNotUtf8New = False
|
|
|
|
|
# Флаг - кодировка с бинарными примесями у оригинального файла
|
|
|
|
|
flagNotUtf8Old = False
|
|
|
|
|
if not copyFile:
|
|
|
|
|
# проверяем кодировку профиля
|
|
|
|
|
if not self.fileIsUtf(newFile):
|
|
|
|
|
flagNotUtf8New = True
|
|
|
|
|
if not (objHeadNew.params.has_key("link") and\
|
|
|
|
|
objHeadNew.params.has_key("symbolic")):
|
|
|
|
|
# проверяем кодировку оригинального файла
|
|
|
|
|
if not self.fileIsUtf(oldFile):
|
|
|
|
|
self.setError(\
|
|
|
|
|
_("Can not decode file in UTF-8:") + " " + oldFile)
|
|
|
|
|
return False
|
|
|
|
|
flagNotUtf8Old = True
|
|
|
|
|
self.newProfile = objHeadNew.body
|
|
|
|
|
#if objHeadNew.fileType != "bin":
|
|
|
|
|
#self.newProfile = self.applyTermsProfile(self.newProfile,
|
|
|
|
@ -2477,6 +2637,10 @@ class profile(_file, _terms):
|
|
|
|
|
objHeadNew.typeAppend == "after") and\
|
|
|
|
|
not (objHeadNew.fileType == "bin" or\
|
|
|
|
|
objHeadNew.fileType == "raw"):
|
|
|
|
|
# Преобразовываем бинарные файлы
|
|
|
|
|
if flagNotUtf8New:
|
|
|
|
|
objTxtCoder = utfBin()
|
|
|
|
|
self.newProfile = objTxtCoder.encode(self.newProfile)
|
|
|
|
|
try:
|
|
|
|
|
exec ("objProfNew=%s(self.newProfile)"%\
|
|
|
|
|
(objHeadNew.fileType))
|
|
|
|
@ -2507,6 +2671,9 @@ class profile(_file, _terms):
|
|
|
|
|
objProfNew.postXML()
|
|
|
|
|
# Получение текстового файла из XML документа
|
|
|
|
|
self.newProfile = objProfNew.getConfig().encode("UTF-8")
|
|
|
|
|
# Если не UTF-8 производим преобразование
|
|
|
|
|
if flagNotUtf8New:
|
|
|
|
|
self.newProfile = objTxtCoder.decode(self.newProfile)
|
|
|
|
|
# Титл для объединения
|
|
|
|
|
if ListOptTitle:
|
|
|
|
|
title = self.getTitle(objProfNew._comment,
|
|
|
|
@ -2556,6 +2723,9 @@ class profile(_file, _terms):
|
|
|
|
|
return filesApply
|
|
|
|
|
# Объединение
|
|
|
|
|
elif objHeadNew.typeAppend == "join":
|
|
|
|
|
if flagNotUtf8New:
|
|
|
|
|
objTxtCoder = utfBin()
|
|
|
|
|
self.newProfile = objTxtCoder.encode(self.newProfile)
|
|
|
|
|
try:
|
|
|
|
|
exec ("objProfNew=%s(self.newProfile)"%\
|
|
|
|
|
(objHeadNew.fileType))
|
|
|
|
@ -2599,6 +2769,9 @@ class profile(_file, _terms):
|
|
|
|
|
self.oldProfile = objHeadOld.body
|
|
|
|
|
else:
|
|
|
|
|
self.oldProfile = ""
|
|
|
|
|
if flagNotUtf8Old:
|
|
|
|
|
objTxtCoder = utfBin()
|
|
|
|
|
self.oldProfile = objTxtCoder.encode(self.oldProfile)
|
|
|
|
|
if self.newObjProt.has_key(objHeadNew.fileType):
|
|
|
|
|
objProfOldCl = self.createNewClass(\
|
|
|
|
|
objHeadNew.fileType,
|
|
|
|
@ -2620,6 +2793,10 @@ class profile(_file, _terms):
|
|
|
|
|
else:
|
|
|
|
|
self.oldProfile = title +\
|
|
|
|
|
objProfOld.getConfig().encode("UTF-8")
|
|
|
|
|
# Декодируем если кодировка не UTF-8
|
|
|
|
|
if flagNotUtf8New or flagNotUtf8Old:
|
|
|
|
|
self.newProfile = objTxtCoder.decode(self.newProfile)
|
|
|
|
|
self.oldProfile = objTxtCoder.decode(self.oldProfile)
|
|
|
|
|
self.saveOldFile()
|
|
|
|
|
return filesApply
|
|
|
|
|
else:
|
|
|
|
|