File size: 1,386 Bytes
9dda31e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
from transformers import pipeline

pipe = pipeline(
    "text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
)

formality_score_map = {
    "formal": {"formal": 58, "informal": 0, "neutral": 22},
    "informal": {"formal": 0, "informal": 86, "neutral": 9.7},
    "neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
}


def formality(src_sentence: str, trg_sentence: str) -> dict:
    """
    Evaluate how well the formality of source (German) sentence is
    in translation (English).  Scores are normalized so that the best
    possible match per source‐label is 100.

    Returns:
        {
          "raw_score": float,        # the value from formality_score_map
          "normalized": float,       # raw_score / max_row * 100
          "src_label": str,
          "trg_label": str
        }
    """
    # classify source & target
    src_label = pipe(src_sentence)[0]["label"].lower()
    trg_label = pipe(trg_sentence)[0]["label"].lower()

    # get raw score from the map
    row = formality_score_map.get(src_label, {})
    raw = row.get(trg_label, 0.0)

    # normalize by that row's max
    max_possible = max(row.values()) if row else 1.0
    normalized = (raw / max_possible) * 100

    return {
        "raw_score": raw,
        "normalized": round(normalized, 2),
        "src_label": src_label,
        "trg_label": trg_label,
    }