Spaces:
Runtime error
Runtime error
File size: 637 Bytes
e50fe35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import sys
import codecs
def clean_vocab(in_vocab_fname, out_vocab_fname):
with codecs.open(in_vocab_fname, "r", encoding="utf-8") as infile, codecs.open(
out_vocab_fname, "w", encoding="utf-8"
) as outfile:
for i, line in enumerate(infile):
fields = line.strip("\r\n ").split(" ")
if len(fields) == 2:
outfile.write(line)
if len(fields) != 2:
print("{}: {}".format(i, line.strip()))
for c in line:
print("{}:{}".format(c, hex(ord(c))))
if __name__ == "__main__":
clean_vocab(sys.argv[1], sys.argv[2])
|