Spaces:

ak5005
/

derrobot

Sleeping

derrobot / categories /style.py

formality code added

9dda31e 2 months ago

1.39 kB

	from transformers import pipeline

	pipe = pipeline(
	"text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
	)

	formality_score_map = {
	"formal": {"formal": 58, "informal": 0, "neutral": 22},
	"informal": {"formal": 0, "informal": 86, "neutral": 9.7},
	"neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
	}


	def formality(src_sentence: str, trg_sentence: str) -> dict:
	"""
	Evaluate how well the formality of source (German) sentence is
	in translation (English). Scores are normalized so that the best
	possible match per source‐label is 100.

	Returns:
	{
	"raw_score": float, # the value from formality_score_map
	"normalized": float, # raw_score / max_row * 100
	"src_label": str,
	"trg_label": str
	}
	"""
	# classify source & target
	src_label = pipe(src_sentence)[0]["label"].lower()
	trg_label = pipe(trg_sentence)[0]["label"].lower()

	# get raw score from the map
	row = formality_score_map.get(src_label, {})
	raw = row.get(trg_label, 0.0)

	# normalize by that row's max
	max_possible = max(row.values()) if row else 1.0
	normalized = (raw / max_possible) * 100

	return {
	"raw_score": raw,
	"normalized": round(normalized, 2),
	"src_label": src_label,
	"trg_label": trg_label,
	}