Spaces:
Runtime error
Runtime error
import sys | |
def add_token(sent, tag_infos): | |
""" add special tokens specified by tag_infos to each element in list | |
tag_infos: list of tuples (tag_type,tag) | |
each tag_info results in a token of the form: __{tag_type}__{tag}__ | |
""" | |
tokens = [] | |
for tag_type, tag in tag_infos: | |
token = '__' + tag_type + '__' + tag + '__' | |
tokens.append(token) | |
return ' '.join(tokens) + ' ' + sent | |
if __name__ == '__main__': | |
infname = sys.argv[1] | |
outfname = sys.argv[2] | |
src_lang = sys.argv[3] | |
tgt_lang = sys.argv[4] | |
with open(infname, 'r', encoding='utf-8') as infile, \ | |
open(outfname, 'w', encoding='utf-8') as outfile: | |
for line in infile: | |
outstr = add_token( | |
line.strip(), [('src', src_lang), ('tgt', tgt_lang)]) | |
outfile.write(outstr + '\n') | |