NMTKD / translation /sents.py
sakharamg's picture
Uploading all files
158b61b
raw
history blame
521 Bytes
from mosestokenizer import *
input_paras = open("ed-tech/WARANGAL-FORT-FINAL.txt", "r").readlines()
output = []
for para in input_paras:
if len(para.strip("\n")) > 0:
with MosesSentenceSplitter('en') as splitsents:
out_temp = splitsents([para.strip("\n").strip()])
for i in out_temp:
output.append(i)
output_sents = open("ed-tech/warangal-fort-final-sents.txt", "w+")
for line in output:
output_sents.write(line.strip("\n").strip() + "\n")
output_sents.close()