File size: 1,386 Bytes
9dda31e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
from transformers import pipeline
pipe = pipeline(
"text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
)
formality_score_map = {
"formal": {"formal": 58, "informal": 0, "neutral": 22},
"informal": {"formal": 0, "informal": 86, "neutral": 9.7},
"neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
}
def formality(src_sentence: str, trg_sentence: str) -> dict:
"""
Evaluate how well the formality of source (German) sentence is
in translation (English). Scores are normalized so that the best
possible match per source‐label is 100.
Returns:
{
"raw_score": float, # the value from formality_score_map
"normalized": float, # raw_score / max_row * 100
"src_label": str,
"trg_label": str
}
"""
# classify source & target
src_label = pipe(src_sentence)[0]["label"].lower()
trg_label = pipe(trg_sentence)[0]["label"].lower()
# get raw score from the map
row = formality_score_map.get(src_label, {})
raw = row.get(trg_label, 0.0)
# normalize by that row's max
max_possible = max(row.values()) if row else 1.0
normalized = (raw / max_possible) * 100
return {
"raw_score": raw,
"normalized": round(normalized, 2),
"src_label": src_label,
"trg_label": trg_label,
}
|