derrobot / categories /accuracy.py
ak5005's picture
Remove spacy download step; use pip-installed model
3c6bfb5
import string
import numpy as np
import torch
from laser_encoders import LaserEncoderPipeline
from scipy.spatial.distance import cosine
from simalign import SentenceAligner
from transformers import AutoModel, AutoTokenizer
# setup global variables on import (bad practice, but whatever)
# --------------------------------------------------------------
aligner = SentenceAligner(model="xlm-roberta-base", layer=6, from_tf = True)
de_encoder = LaserEncoderPipeline(lang="deu_Latn")
en_encoder = LaserEncoderPipeline(lang="eng_Latn")
def accuracy(src_sentence: str, trg_sentence: str) -> dict:
"""
Calculate the accuracy of a translation by comparing the source and target
sentences.
Parameters:
src_sentence (str): The source sentence.
trg_sentence (str): The target sentence.
Returns:
dict: A dictionary containing the accuracy score and errors.
"""
# Preprocess both sentences
src_sentence = __preprocess_text(src_sentence)
trg_sentence = __preprocess_text(trg_sentence)
r = __get_alignment_score(src_sentence, trg_sentence)
score = __get_bertscore(src_sentence, trg_sentence)
res = {"score": __bertscore_to_percentage(score), "errors": r}
return res
def __preprocess_text(text: str) -> str:
"""
Remove punctuation and convert text to lowercase.
Parameters:
text (str): The text to preprocess.
Returns:
str: The preprocessed text.
"""
# Remove punctuation
text = text.translate(str.maketrans("", "", string.punctuation))
# Convert to lowercase
text = text.lower()
return text
def __get_bertscore(src_sentence: str, trg_sentence: str) -> float:
"""
Get the BERTScore between two sentences.
Parameters:
src_sentence (str): The source sentence.
trg_sentence (str): The target sentence.
Returns:
float: The BERTScore.
"""
# Tokenize and generate embeddings
emb_src = de_encoder.encode_sentences([src_sentence])[0]
emb_tgt = en_encoder.encode_sentences([trg_sentence])[0]
# Calculate cosine similarity (1 - cosine distance)
similarity = 1 - cosine(emb_src, emb_tgt)
return similarity
def __bertscore_to_percentage(similarity: float, debug: bool = False) -> float:
"""
Convert the BERTScore cosine similarity to a percentage score (0-100).
Parameters:
similarity (float): The cosine similarity from BERTScore.
Returns:
int: A score from 0 to 100.
"""
# Scale the similarity score from [-1, 1] range to [0, 100] (rarely negative)
# Logistic function: 100 / (1 + exp(-k * (x - 0.5))), where k controls steepness
# k = 35 # Steepness parameter - higher values create a sharper transition
if debug:
scaled_score = similarity
else:
scaled_score = max(
100 / (1 + np.exp(-11 * (similarity - 0.60))),
100 / (1 + np.exp(-5 * (similarity - 0.60))),
)
# scaled_score = similarity
return round(scaled_score, 2)
def __get_alignment_score(src_sentence: str, trg_sentence: str) -> list:
"""
Get the alignment score between two sentences.
Parameters:
src_sentence (str): The source sentence.
trg_sentence (str): The target sentence.
Returns:
list: Mistranslations
"""
src_list = src_sentence.split()
trg_list = trg_sentence.split()
# The output is a dictionary with different matching methods.
# Each method has a list of pairs indicating the indexes of aligned words (The alignments are zero-indexed).
alignments = aligner.get_word_aligns(src_list, trg_list)
src_aligns = {x[0] for x in alignments["inter"]}
trg_aligns = {x[1] for x in alignments["inter"]}
mistranslations = []
for i in range(len(src_list)):
if i not in src_aligns:
mistranslations.append(
{
"start": i,
"end": i,
"message": f"Word {src_list[i]} possibly mistranslated or omitted",
}
)
for i in range(len(trg_list)):
if i not in trg_aligns:
mistranslations.append(
{
"start": i,
"end": i,
"message": f"Word {trg_list[i]} possibly mistranslated or added erroneously",
}
)
return mistranslations