From b22a18cc498f03b67e11b85b927d9a12acb21488 Mon Sep 17 00:00:00 2001 From: Teng-Yi Tseng Date: Thu, 20 Nov 2014 10:58:15 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=82=BA=E7=9B=AE=E5=89=8D?= =?UTF-8?q?=E7=89=88=E6=9C=ACg2butf8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nnyConvert.xcodeproj/project.pbxproj | 4 +- nnyConvert/dic_tw.py | 1 + nnyConvert/g2butf8.py | 107 ++++++++------------------- nnyConvert/userdic.txt | 4 +- 4 files changed, 37 insertions(+), 79 deletions(-) diff --git a/nnyConvert.xcodeproj/project.pbxproj b/nnyConvert.xcodeproj/project.pbxproj index 9f9edf9..a05c8a0 100644 --- a/nnyConvert.xcodeproj/project.pbxproj +++ b/nnyConvert.xcodeproj/project.pbxproj @@ -243,7 +243,6 @@ GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = ""; ONLY_ACTIVE_ARCH = YES; SDKROOT = macosx; }; @@ -264,7 +263,6 @@ GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_UNINITIALIZED_AUTOS = YES; GCC_WARN_UNUSED_VARIABLE = YES; - MACOSX_DEPLOYMENT_TARGET = ""; ONLY_ACTIVE_ARCH = YES; SDKROOT = macosx; }; @@ -273,6 +271,7 @@ 34E6684315EA40A200B42A4A /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { + ARCHS = "$(NATIVE_ARCH_ACTUAL)"; COMBINE_HIDPI_IMAGES = YES; GCC_PRECOMPILE_PREFIX_HEADER = YES; GCC_PREFIX_HEADER = "nnyConvert/nnyConvert-Prefix.pch"; @@ -286,6 +285,7 @@ 34E6684415EA40A200B42A4A /* Release */ = { isa = XCBuildConfiguration; buildSettings = { + ARCHS = "$(NATIVE_ARCH_ACTUAL)"; COMBINE_HIDPI_IMAGES = YES; GCC_PRECOMPILE_PREFIX_HEADER = YES; GCC_PREFIX_HEADER = "nnyConvert/nnyConvert-Prefix.pch"; diff --git a/nnyConvert/dic_tw.py b/nnyConvert/dic_tw.py index 194959f..23ad9c3 100644 --- a/nnyConvert/dic_tw.py +++ b/nnyConvert/dic_tw.py @@ -1410,4 +1410,5 @@ def dic_tw(): u"索馬裏":u"索馬利亞", u"肯尼亞":u"肯亞", u"肯雅":u"肯亞", +u"註意":u"注意" }; diff --git a/nnyConvert/g2butf8.py b/nnyConvert/g2butf8.py index 8acd305..ec76136 100755 --- a/nnyConvert/g2butf8.py +++ b/nnyConvert/g2butf8.py @@ -42,33 +42,6 @@ def getEncodingByContent(content): detector.feed(content) detector.close() return detector.result["encoding"] - -# get user define dictionary -def getUserDic(filename): - user_dic= {} - if os.path.exists(filename): - f_encoding = getEncoding(filename) - if f_encoding == None: - print (u"抱歉, 未能正確判斷自定字典編碼!\n\n"); - else: - fpr = open(filename, 'r'); - lines = fpr.readlines(); - fpr.close(); - - if lines[0].startswith(codecs.BOM_UTF8 ): - lines[0] = lines[0].lstrip(codecs.BOM_UTF8 ); - - for line in lines: - line = line.decode(f_encoding); - words = line.split('=') - key = words[0].lstrip().rstrip(); - value = words[1].lstrip().rstrip(); - user_dic[key] = value; - - return user_dic - -#user dictionary file -user_dic_file = 'userdic.txt' # start error message @@ -91,55 +64,39 @@ def getUserDic(filename): def convertFile(target_file): - user_dic= {} - if os.path.exists(target_file): - f_encoding = getEncoding(target_file) - print u"正在轉換", target_file, u" 編碼為: ", f_encoding - if f_encoding == None: - print (u"抱歉, 未能正確判斷編碼!\n\n"); - else: - result_content = u'' - original_content = u'' - fp = open(target_file, 'r') - original_content = fp.read() - fp.close() + f_encoding = getEncoding(target_file) + print u"正在轉換", target_file, u" 編碼為: ", f_encoding + if f_encoding == None: + print (u"抱歉, 未能正確判斷編碼!\n\n"); + else: + result_content = u'' + original_content = u'' + fp = open(target_file, 'r') + original_content = fp.read() + fp.close() + + if original_content.startswith( codecs.BOM_UTF8 ): + original_content.lstrip( codecs.BOM_UTF8); - if original_content.startswith( codecs.BOM_UTF8 ): - original_content = original_content.lstrip( codecs.BOM_UTF8); - - utf8content=original_content.decode(f_encoding) - - newcontent = jtof(utf8content) - lines = newcontent.splitlines(); - for line in lines: - line = convertVocabulary(line, dic_tw()); - - if os.path.getsize(target_file) > 0: - # do backup - backup_file = target_file + '.bak' - shutil.copy2(target_file, backup_file) - fpw = open(target_file, 'w') - if not newcontent.startswith(codecs.BOM_UTF8.decode( "utf8" )): - fpw.write(codecs.BOM_UTF8) - - pathdir =os.path.dirname(os.path.abspath(target_file)); - user_dic_pathname = pathdir +os.path.sep+user_dic_file; - - if os.path.exists(user_dic_pathname): - user_dic = getUserDic(user_dic_pathname); - if len(user_dic) > 0: - for line in lines: - line = convertVocabulary(line, user_dic); - - for line in lines: - line = convertVocabulary(line, user_dic); - fpw.write(line.encode('UTF-8')) - fpw.write("\n"); - fpw.close(); - - print (MSG_CONVERT_FINISH) - else: - print MSG_NO_CONVERT + utf8content=original_content.decode(f_encoding, 'ignore') + + + + newcontent = jtof(utf8content) + newcontent = convertVocabulary(newcontent, dic_tw()); + if os.path.getsize(target_file) > 0: + # do backup + backup_file = target_file + '.bak' + shutil.copy2(target_file, backup_file) + fpw = open(target_file, 'w') + if not newcontent.startswith(codecs.BOM_UTF8.decode( "utf8" )): + fpw.write(codecs.BOM_UTF8) + fpw.write(newcontent.encode('UTF-8')) + fpw.close(); + + print (MSG_CONVERT_FINISH) + else: + print MSG_NO_CONVERT if __name__ == "__main__": diff --git a/nnyConvert/userdic.txt b/nnyConvert/userdic.txt index 47d747e..3b81b45 100644 --- a/nnyConvert/userdic.txt +++ b/nnyConvert/userdic.txt @@ -1,2 +1,2 @@ -= -Yo=Yv \ No newline at end of file +李潤成=李潤城 +頭發=頭髮 \ No newline at end of file