File size: 1,126 Bytes
ff02dc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
import sys
from tqdm import tqdm
from categories.accuracy import get_bertscore
from categories.fluency import pseudo_perplexity, grammar_errors


def annotate_entries(entries):
    for ex in tqdm(entries):
        src = ex["german"]
        tgt = ex["english"]

        sim = get_bertscore(src, tgt)

        pp = pseudo_perplexity(tgt)

        ge = grammar_errors(tgt)

        # append new fields
        ex["bertscore"]      = round(float(sim), 4)
        ex["fluency_score"]  = float(pp["score"])
        ex["grammar_score"]  = float(ge["score"])

    return entries


def main(input_path, output_path):

    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)


    annotated = annotate_entries(data)


    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(annotated, f, indent=2, ensure_ascii=False)

    print(f"Annotated {len(annotated)} entries → {output_path}")


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage: python annotate_translations.py  input.json  output.json")
        sys.exit(1)
    main(sys.argv[1], sys.argv[2])