Spaces:

ak5005
/

derrobot

Sleeping

File size: 1,386 Bytes

9dda31e

from transformers import pipeline

pipe = pipeline(
    "text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
)

formality_score_map = {
    "formal": {"formal": 58, "informal": 0, "neutral": 22},
    "informal": {"formal": 0, "informal": 86, "neutral": 9.7},
    "neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
}


def formality(src_sentence: str, trg_sentence: str) -> dict:
    """
    Evaluate how well the formality of source (German) sentence is
    in translation (English).  Scores are normalized so that the best
    possible match per source‐label is 100.

    Returns:
        {
          "raw_score": float,        # the value from formality_score_map
          "normalized": float,       # raw_score / max_row * 100
          "src_label": str,
          "trg_label": str
        }
    """
    # classify source & target
    src_label = pipe(src_sentence)[0]["label"].lower()
    trg_label = pipe(trg_sentence)[0]["label"].lower()

    # get raw score from the map
    row = formality_score_map.get(src_label, {})
    raw = row.get(trg_label, 0.0)

    # normalize by that row's max
    max_possible = max(row.values()) if row else 1.0
    normalized = (raw / max_possible) * 100

    return {
        "raw_score": raw,
        "normalized": round(normalized, 2),
        "src_label": src_label,
        "trg_label": trg_label,
    }