logicwong's picture
init
c9bb3f2
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
BLEU scoring of generated translations against reference translations.
"""
import argparse
import os
import sys
from fairseq.data import dictionary
from fairseq.scoring import bleu
def get_parser():
parser = argparse.ArgumentParser(
description="Command-line script for BLEU scoring."
)
# fmt: off
parser.add_argument('-s', '--sys', default='-', help='system output')
parser.add_argument('-r', '--ref', required=True, help='references')
parser.add_argument('-o', '--order', default=4, metavar='N',
type=int, help='consider ngrams up to this order')
parser.add_argument('--ignore-case', action='store_true',
help='case-insensitive scoring')
parser.add_argument('--sacrebleu', action='store_true',
help='score with sacrebleu')
parser.add_argument('--sentence-bleu', action='store_true',
help='report sentence-level BLEUs (i.e., with +1 smoothing)')
# fmt: on
return parser
def cli_main():
parser = get_parser()
args = parser.parse_args()
print(args)
assert args.sys == "-" or os.path.exists(
args.sys
), "System output file {} does not exist".format(args.sys)
assert os.path.exists(args.ref), "Reference file {} does not exist".format(args.ref)
dict = dictionary.Dictionary()
def readlines(fd):
for line in fd.readlines():
if args.ignore_case:
yield line.lower()
else:
yield line
if args.sacrebleu:
import sacrebleu
def score(fdsys):
with open(args.ref) as fdref:
print(sacrebleu.corpus_bleu(fdsys, [fdref]).format())
elif args.sentence_bleu:
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(dict.pad(), dict.eos(), dict.unk())
for i, (sys_tok, ref_tok) in enumerate(
zip(readlines(fdsys), readlines(fdref))
):
scorer.reset(one_init=True)
sys_tok = dict.encode_line(sys_tok)
ref_tok = dict.encode_line(ref_tok)
scorer.add(ref_tok, sys_tok)
print(i, scorer.result_string(args.order))
else:
def score(fdsys):
with open(args.ref) as fdref:
scorer = bleu.Scorer(
bleu.BleuConfig(
pad=dict.pad(),
eos=dict.eos(),
unk=dict.unk(),
)
)
for sys_tok, ref_tok in zip(readlines(fdsys), readlines(fdref)):
sys_tok = dict.encode_line(sys_tok)
ref_tok = dict.encode_line(ref_tok)
scorer.add(ref_tok, sys_tok)
print(scorer.result_string(args.order))
if args.sys == "-":
score(sys.stdin)
else:
with open(args.sys, "r") as f:
score(f)
if __name__ == "__main__":
cli_main()