|
from transformers import pipeline |
|
|
|
pipe = pipeline( |
|
"text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base" |
|
) |
|
|
|
formality_score_map = { |
|
"formal": {"formal": 58, "informal": 0, "neutral": 22}, |
|
"informal": {"formal": 0, "informal": 86, "neutral": 9.7}, |
|
"neutral": {"formal": 20, "informal": 5.1, "neutral": 86}, |
|
} |
|
|
|
|
|
def formality(src_sentence: str, trg_sentence: str) -> dict: |
|
""" |
|
Evaluate how well the formality of source (German) sentence is |
|
in translation (English). Scores are normalized so that the best |
|
possible match per source‐label is 100. |
|
|
|
Returns: |
|
{ |
|
"raw_score": float, # the value from formality_score_map |
|
"normalized": float, # raw_score / max_row * 100 |
|
"src_label": str, |
|
"trg_label": str |
|
} |
|
""" |
|
|
|
src_label = pipe(src_sentence)[0]["label"].lower() |
|
trg_label = pipe(trg_sentence)[0]["label"].lower() |
|
|
|
|
|
row = formality_score_map.get(src_label, {}) |
|
raw = row.get(trg_label, 0.0) |
|
|
|
|
|
max_possible = max(row.values()) if row else 1.0 |
|
normalized = (raw / max_possible) * 100 |
|
|
|
return { |
|
"raw_score": raw, |
|
"normalized": round(normalized, 2), |
|
"src_label": src_label, |
|
"trg_label": trg_label, |
|
} |
|
|