| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """Python implementation of BLEU and smooth-BLEU. |
| | |
| | This module provides a Python implementation of BLEU and smooth-BLEU. |
| | Smooth BLEU is computed following the method outlined in the paper: |
| | Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic |
| | evaluation metrics for machine translation. COLING 2004. |
| | """ |
| |
|
| | import collections |
| | import math |
| |
|
| |
|
| | def _get_ngrams(segment, max_order): |
| | """Extracts all n-grams upto a given maximum order from an input segment. |
| | |
| | Args: |
| | segment: text segment from which n-grams will be extracted. |
| | max_order: maximum length in tokens of the n-grams returned by this |
| | methods. |
| | |
| | Returns: |
| | The Counter containing all n-grams upto max_order in segment |
| | with a count of how many times each n-gram occurred. |
| | """ |
| | ngram_counts = collections.Counter() |
| | for order in range(1, max_order + 1): |
| | for i in range(0, len(segment) - order + 1): |
| | ngram = tuple(segment[i:i+order]) |
| | ngram_counts[ngram] += 1 |
| | return ngram_counts |
| |
|
| |
|
| | def compute_bleu(reference_corpus, translation_corpus, max_order=4, |
| | smooth=False): |
| | """Computes BLEU score of translated segments against one or more references. |
| | |
| | Args: |
| | reference_corpus: list of lists of references for each translation. Each |
| | reference should be tokenized into a list of tokens. |
| | translation_corpus: list of translations to score. Each translation |
| | should be tokenized into a list of tokens. |
| | max_order: Maximum n-gram order to use when computing BLEU score. |
| | smooth: Whether or not to apply Lin et al. 2004 smoothing. |
| | |
| | Returns: |
| | 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram |
| | precisions and brevity penalty. |
| | """ |
| | matches_by_order = [0] * max_order |
| | possible_matches_by_order = [0] * max_order |
| | reference_length = 0 |
| | translation_length = 0 |
| | for (references, translation) in zip(reference_corpus, |
| | translation_corpus): |
| | reference_length += min(len(r) for r in references) |
| | translation_length += len(translation) |
| |
|
| | merged_ref_ngram_counts = collections.Counter() |
| | for reference in references: |
| | merged_ref_ngram_counts |= _get_ngrams(reference, max_order) |
| | translation_ngram_counts = _get_ngrams(translation, max_order) |
| | overlap = translation_ngram_counts & merged_ref_ngram_counts |
| | for ngram in overlap: |
| | matches_by_order[len(ngram)-1] += overlap[ngram] |
| | for order in range(1, max_order+1): |
| | possible_matches = len(translation) - order + 1 |
| | if possible_matches > 0: |
| | possible_matches_by_order[order-1] += possible_matches |
| |
|
| | precisions = [0] * max_order |
| | for i in range(0, max_order): |
| | if smooth: |
| | precisions[i] = ((matches_by_order[i] + 1.) / |
| | (possible_matches_by_order[i] + 1.)) |
| | else: |
| | if possible_matches_by_order[i] > 0: |
| | precisions[i] = (float(matches_by_order[i]) / |
| | possible_matches_by_order[i]) |
| | else: |
| | precisions[i] = 0.0 |
| |
|
| | if min(precisions) > 0: |
| | p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions) |
| | geo_mean = math.exp(p_log_sum) |
| | else: |
| | geo_mean = 0 |
| |
|
| | ratio = float(translation_length) / reference_length |
| |
|
| | if ratio > 1.0: |
| | bp = 1. |
| | else: |
| | bp = math.exp(1 - 1. / ratio) |
| |
|
| | bleu = geo_mean * bp |
| |
|
| | return (bleu, precisions, bp, ratio, translation_length, reference_length) |
| |
|
| |
|
| | def _bleu(ref_file, trans_file, subword_option=None): |
| | max_order = 4 |
| | smooth = True |
| | ref_files = [ref_file] |
| | reference_text = [] |
| | for reference_filename in ref_files: |
| | with open(reference_filename) as fh: |
| | reference_text.append(fh.readlines()) |
| | per_segment_references = [] |
| | for references in zip(*reference_text): |
| | reference_list = [] |
| | for reference in references: |
| | reference_list.append(reference.strip().split()) |
| | per_segment_references.append(reference_list) |
| | translations = [] |
| | with open(trans_file) as fh: |
| | for line in fh: |
| | translations.append(line.strip().split()) |
| | bleu_score, _, _, _, _, _ = compute_bleu(per_segment_references, translations, max_order, smooth) |
| | return round(100 * bleu_score,2) |