|
|
|
|
|
|
|
|
|
|
|
r'''Generic Moses Wrapper |
|
|
|
Run moses, wrapping various inputs and outputs |
|
(useful as decoder-executable for mert-moses.pl) |
|
|
|
mert-moses.pl \ |
|
--decoder wrap_moses.py --input src --refs ref --config moses.ini \ |
|
--decoder-flags="--wrap-input-file my_preproc_script.sh \ |
|
--wrap-n-best-list my_postproc_script.sh" |
|
|
|
Commands are run through shell, so they may contain multiple piped commands |
|
|
|
Anything not in the following list is passed through to moses as decoder flags |
|
''' |
|
|
|
import argparse |
|
import os |
|
import shutil |
|
import subprocess |
|
import sys |
|
import tempfile |
|
|
|
|
|
MOSES = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( |
|
os.path.abspath(__file__)))), 'bin', 'moses') |
|
|
|
|
|
def popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE): |
|
'''Open command for streaming''' |
|
return subprocess.Popen(cmd, shell=shell, stdin=stdin, stdout=stdout) |
|
|
|
|
|
def main(): |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument( |
|
'--moses', help='Moses executable (default: {})'.format(MOSES), |
|
default=MOSES) |
|
parser.add_argument( |
|
'--tmp', help='Temp directory parent (default: /tmp)', default='/tmp') |
|
parser.add_argument( |
|
'--wrap-input-file', metavar='CMD', |
|
help='Pipe input file through this command') |
|
parser.add_argument( |
|
'--wrap-n-best-list', metavar='CMD', |
|
help='Pipe n-best list through this command') |
|
parser.add_argument( |
|
'--wrap-stdin', metavar='CMD', help='Pipe stdin through this command') |
|
parser.add_argument( |
|
'--wrap-stdout', metavar='CMD', help='Pipe stdout through this command') |
|
|
|
|
|
if len(sys.argv) == 1: |
|
sys.stderr.write(__doc__) |
|
parser.print_help() |
|
sys.exit(2) |
|
|
|
|
|
(args, moses_args) = parser.parse_known_args() |
|
moses_arg_set = set(moses_args) |
|
|
|
|
|
if '--show-weights' in moses_arg_set or '-show-weights' in moses_arg_set: |
|
sys.stdout.write(subprocess.check_output([args.moses] + moses_args)) |
|
sys.stdout.flush() |
|
sys.exit(0) |
|
|
|
|
|
input_file = None |
|
input_file_i = None |
|
n_best_list = None |
|
n_best_list_i = None |
|
if not os.path.exists(args.moses): |
|
sys.stderr.write( |
|
'Error: cannot find moses executable at "{}", ' |
|
'specify with --moses\n'.format(args.moses)) |
|
sys.exit(1) |
|
if args.wrap_input_file and args.wrap_stdin: |
|
sys.stderr.write( |
|
'Error: cannot use both --wrap-input-file and --wrap-stdin\n') |
|
sys.exit(1) |
|
if args.wrap_input_file: |
|
try: |
|
input_file_i = moses_args.index('--input-file') + 1 |
|
except ValueError: |
|
sys.stderr.write( |
|
'Error: --wrap-input-file requires --input-file\n') |
|
sys.exit(1) |
|
input_file = moses_args[input_file_i] |
|
if args.wrap_n_best_list: |
|
try: |
|
n_best_list_i = moses_args.index('--n-best-list') + 1 |
|
except ValueError: |
|
try: |
|
n_best_list_i = moses_args.index('-n-best-list') + 1 |
|
except ValueError: |
|
sys.stderr.write( |
|
'Error: --wrap-n-best-list requires --n-best-list\n') |
|
sys.exit(1) |
|
n_best_list = moses_args[n_best_list_i] |
|
|
|
stream_input = not ( |
|
'--input-file' in moses_arg_set or '-input-file' in moses_arg_set |
|
or '-i' in moses_arg_set) |
|
|
|
|
|
tmp = tempfile.mkdtemp(prefix=os.path.join(args.tmp, 'moses.')) |
|
|
|
|
|
moses_input_file = None |
|
if args.wrap_input_file: |
|
moses_input_file = os.path.join(tmp, 'input_file') |
|
subprocess.call('{} <{} >{}'.format( |
|
args.wrap_input_file, input_file, moses_input_file), shell=True) |
|
|
|
moses_n_best_list = os.path.join(tmp, 'n_best_list') |
|
|
|
|
|
moses_cmd = moses_args[:] |
|
if args.wrap_input_file: |
|
moses_cmd[input_file_i] = moses_input_file |
|
if args.wrap_n_best_list: |
|
moses_cmd[n_best_list_i] = moses_n_best_list |
|
moses_cmd = [args.moses] + moses_cmd |
|
|
|
|
|
wrap_stdin = None |
|
moses_stdin = subprocess.PIPE |
|
if args.wrap_stdin: |
|
wrap_stdin = popen(args.wrap_stdin) |
|
moses_stdin = wrap_stdin.stdout |
|
moses = None |
|
wrap_stdout = None |
|
if args.wrap_stdout: |
|
|
|
moses = popen(moses_cmd, shell=False, stdin=moses_stdin) |
|
wrap_stdout = popen( |
|
args.wrap_stdout, stdin=moses.stdout, stdout=sys.stdout) |
|
else: |
|
|
|
moses = popen( |
|
moses_cmd, shell=False, stdin=moses_stdin, stdout=sys.stdout) |
|
|
|
|
|
stdin = wrap_stdin.stdin if wrap_stdin else moses.stdin |
|
if stream_input: |
|
while True: |
|
line = sys.stdin.readline() |
|
if not line: |
|
break |
|
stdin.write(line) |
|
stdin.flush() |
|
stdin.close() |
|
if wrap_stdin: |
|
wrap_stdin.wait() |
|
moses.wait() |
|
if wrap_stdout: |
|
wrap_stdout.wait() |
|
|
|
|
|
if args.wrap_n_best_list: |
|
subprocess.call('{} <{} >{}'.format( |
|
args.wrap_n_best_list, moses_n_best_list, n_best_list), shell=True) |
|
|
|
|
|
shutil.rmtree(tmp) |
|
|
|
|
|
if __name__ == '__main__': |
|
main() |
|
|