#format python #!/usr/bin/env python #encoding:utf-8 """Encoding converter by yong27, 2005-06-09 UnitTest have to be done in UTF-8 environment """ import unittest, os, sys class EcConverter: def __init__(self, from_, to_): self.fromEnc = from_ self.toEnc = to_ def convert(self, aStr): return unicode(aStr, self.fromEnc).encode(self.toEnc) def convertPathName(self, aPath): for root, dirs, files in os.walk(aPath): cvt_root = self.convert(root) root != cvt_root and os.renames(root, cvt_root) for fname in files: cvt_fname = self.convert(fname) fname = os.path.join(cvt_root, fname) cvt_fname = os.path.join(cvt_root, cvt_fname) fname != cvt_fname and os.rename(fname, cvt_fname) def convertPathNames(self, aPaths): for path in aPaths: self.convertPathName(path) def convertFile(self, fname): ifile = file(fname) try: converted = self.convert(ifile.read()) except UnicodeDecodeError, e: sys.stderr.write("%s failed : %s\n"%(fname,e)) return temp_fname = fname+'_' ofile = file(temp_fname,'w') ofile.write(converted) ifile.close() ofile.close() os.rename(temp_fname, fname) def convertFiles(self, fnames): for fname in fnames: self.convertFile(fname) class EcConverterTest(unittest.TestCase): def setUp(self): self.ec1 = EcConverter('utf-8','euc-kr') self.ec2 = EcConverter('euc-kr','utf-8') os.mkdir('ectest') tdir = os.path.join('ectest','디렉토리') os.mkdir(tdir) file(os.path.join(tdir, '파일'), 'w').write('파일내용') def tearDown(self): os.system('rm -rf ectest') def testConvertString(self): self.assertEquals('\xc5\xd7\xbd\xba\xc6\xae', self.ec1.convert('테스트')) self.assertEquals('테스트', self.ec2.convert('\xc5\xd7\xbd\xba\xc6\xae')) def testConvertPathName(self): self.ec1.convertPathName('ectest') htest = os.listdir('ectest')[0] self.assertEquals(self.ec1.convert('디렉토리'), htest) hfile = os.listdir(os.path.join('ectest',htest))[0] self.assertEquals(self.ec1.convert('파일'), hfile) self.ec2.convertPathName('ectest') htest = os.listdir('ectest')[0] self.assertEquals('디렉토리', htest) hfile = os.listdir(os.path.join('ectest',htest))[0] self.assertEquals('파일', hfile) def testConvertFile(self): ifileName = os.path.join('ectest', '디렉토리','파일') self.ec1.convertFile(ifileName) self.assertEquals(self.ec1.convert('파일내용'), file(ifileName).read()) def main(): import optparse usage = "%prog [options] arg1 arg2..." op = optparse.OptionParser(usage, version="%prog 0.1") op.add_option("-u", "--unittest", action="store_true", dest="test", default=False, help="doing unittest") op.add_option("-p", "--path-names", action="store_true", dest="isPath", default=False, help="is path names converting recursively") op.add_option("-f", "--from", dest="encfrom", default="euc-kr", help="encoding of original directory (default:euc-kr)") op.add_option("-t", "--to", dest="encto", default="utf-8", help="expected encoding to convert (default:uft-8)") options, args = op.parse_args() if options.test: unittest.TextTestRunner().run(unittest.main(argv=('','-v'))) if not args: op.error("insert argument files or directories") ec = EcConverter(options.encfrom, options.encto) if options.isPath: ec.convertPathNames(args) else: ec.convertFiles(args) if __name__=='__main__': main()