|
import json |
|
import sys |
|
from tqdm import tqdm |
|
from categories.accuracy import get_bertscore |
|
from categories.fluency import pseudo_perplexity, grammar_errors |
|
|
|
|
|
def annotate_entries(entries): |
|
for ex in tqdm(entries): |
|
src = ex["german"] |
|
tgt = ex["english"] |
|
|
|
sim = get_bertscore(src, tgt) |
|
|
|
pp = pseudo_perplexity(tgt) |
|
|
|
ge = grammar_errors(tgt) |
|
|
|
|
|
ex["bertscore"] = round(float(sim), 4) |
|
ex["fluency_score"] = float(pp["score"]) |
|
ex["grammar_score"] = float(ge["score"]) |
|
|
|
return entries |
|
|
|
|
|
def main(input_path, output_path): |
|
|
|
with open(input_path, "r", encoding="utf-8") as f: |
|
data = json.load(f) |
|
|
|
|
|
annotated = annotate_entries(data) |
|
|
|
|
|
with open(output_path, "w", encoding="utf-8") as f: |
|
json.dump(annotated, f, indent=2, ensure_ascii=False) |
|
|
|
print(f"Annotated {len(annotated)} entries → {output_path}") |
|
|
|
|
|
if __name__ == "__main__": |
|
if len(sys.argv) != 3: |
|
print("Usage: python annotate_translations.py input.json output.json") |
|
sys.exit(1) |
|
main(sys.argv[1], sys.argv[2]) |
|
|