Spaces:
Runtime error
Runtime error
File size: 853 Bytes
e50fe35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import sys
def add_token(sent, tag_infos):
""" add special tokens specified by tag_infos to each element in list
tag_infos: list of tuples (tag_type,tag)
each tag_info results in a token of the form: __{tag_type}__{tag}__
"""
tokens = []
for tag_type, tag in tag_infos:
token = '__' + tag_type + '__' + tag + '__'
tokens.append(token)
return ' '.join(tokens) + ' ' + sent
if __name__ == '__main__':
infname = sys.argv[1]
outfname = sys.argv[2]
src_lang = sys.argv[3]
tgt_lang = sys.argv[4]
with open(infname, 'r', encoding='utf-8') as infile, \
open(outfname, 'w', encoding='utf-8') as outfile:
for line in infile:
outstr = add_token(
line.strip(), [('src', src_lang), ('tgt', tgt_lang)])
outfile.write(outstr + '\n')
|