Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| # | |
| # Copyright (c) Facebook, Inc. and its affiliates. | |
| # | |
| # This source code is licensed under the MIT license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| import sys | |
| import sacremoses | |
| def main(args): | |
| """Tokenizes, preserving tabs""" | |
| mt = sacremoses.MosesTokenizer(lang=args.lang) | |
| def tok(s): | |
| return mt.tokenize(s, return_str=True) | |
| for line in sys.stdin: | |
| parts = list(map(tok, line.split("\t"))) | |
| print(*parts, sep="\t", flush=True) | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--lang", "-l", default="en") | |
| parser.add_argument("--penn", "-p", action="store_true") | |
| parser.add_argument("--fields", "-f", help="fields to tokenize") | |
| args = parser.parse_args() | |
| main(args) | |