NMTKD / translation /merge_bpe_word.py
sakharamg's picture
Uploading all files
158b61b
# -*- coding: utf-8 -*
import re
import sys
import os
def add_word(f1,f2,w):
for line1, line2 in zip(f1,f2):
line2 = line2.strip().split()
line_len=len(line2)
i=-1
for tok in line1.strip().split():
if re.search("▁",tok):
if(i!=line_len):
w.write(str(tok)+"│"+str(line2[i])+" ")
else:
w.write(str(tok)+"│"+"।"+" ")
else:
if(i!=line_len):
i+=1
w.write(str(tok)+"│"+str(line2[i])+" ")
else:
w.write(str(tok)+"│"+"।"+" ")
w.write("\n")
f1.close()
f2.close()
w.close()
f1=open(sys.argv[1],'r') #bpe_file
f2=open(sys.argv[2],'r') #word_file
out = sys.argv[3]
w=open(out,'w')
add_word(f1,f2,w)