derrobot / annotate.py
Aidan Phillips
training
ff02dc4
import json
import sys
from tqdm import tqdm
from categories.accuracy import get_bertscore
from categories.fluency import pseudo_perplexity, grammar_errors
def annotate_entries(entries):
for ex in tqdm(entries):
src = ex["german"]
tgt = ex["english"]
sim = get_bertscore(src, tgt)
pp = pseudo_perplexity(tgt)
ge = grammar_errors(tgt)
# append new fields
ex["bertscore"] = round(float(sim), 4)
ex["fluency_score"] = float(pp["score"])
ex["grammar_score"] = float(ge["score"])
return entries
def main(input_path, output_path):
with open(input_path, "r", encoding="utf-8") as f:
data = json.load(f)
annotated = annotate_entries(data)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(annotated, f, indent=2, ensure_ascii=False)
print(f"Annotated {len(annotated)} entries → {output_path}")
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python annotate_translations.py input.json output.json")
sys.exit(1)
main(sys.argv[1], sys.argv[2])