Spaces:

ak5005
/

derrobot

Sleeping

derrobot / annotate.py

Aidan Phillips

training

ff02dc4 about 2 months ago

1.13 kB

	import json
	import sys
	from tqdm import tqdm
	from categories.accuracy import get_bertscore
	from categories.fluency import pseudo_perplexity, grammar_errors


	def annotate_entries(entries):
	for ex in tqdm(entries):
	src = ex["german"]
	tgt = ex["english"]

	sim = get_bertscore(src, tgt)

	pp = pseudo_perplexity(tgt)

	ge = grammar_errors(tgt)

	# append new fields
	ex["bertscore"] = round(float(sim), 4)
	ex["fluency_score"] = float(pp["score"])
	ex["grammar_score"] = float(ge["score"])

	return entries


	def main(input_path, output_path):

	with open(input_path, "r", encoding="utf-8") as f:
	data = json.load(f)


	annotated = annotate_entries(data)


	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(annotated, f, indent=2, ensure_ascii=False)

	print(f"Annotated {len(annotated)} entries → {output_path}")


	if __name__ == "__main__":
	if len(sys.argv) != 3:
	print("Usage: python annotate_translations.py input.json output.json")
	sys.exit(1)
	main(sys.argv[1], sys.argv[2])