formality code added
Browse files- app.py +11 -4
- categories/accuracy.py +6 -3
- categories/style.py +45 -0
- modules/nav.py +5 -4
- pages/scoring.py +4 -4
- utilities/test_accuracy.py +7 -5
app.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
-
import time
|
2 |
import json
|
|
|
|
|
3 |
|
4 |
import streamlit as st
|
5 |
|
6 |
from categories.accuracy import *
|
7 |
from categories.fluency import *
|
8 |
-
import
|
9 |
-
|
10 |
from modules.nav import Navbar
|
11 |
|
12 |
Navbar()
|
13 |
|
|
|
14 |
# Load translations from a JSON file to be used by the bot
|
15 |
def load_translations():
|
16 |
try:
|
@@ -41,8 +42,14 @@ def response_generator(prompt):
|
|
41 |
acc = accuracy(source, prompt)
|
42 |
ppl = pseudo_perplexity(prompt)
|
43 |
gre = grammar_errors(prompt)
|
|
|
44 |
|
45 |
-
total_score =
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
if "scores" not in st.session_state:
|
48 |
st.session_state.scores = []
|
|
|
|
|
1 |
import json
|
2 |
+
import random
|
3 |
+
import time
|
4 |
|
5 |
import streamlit as st
|
6 |
|
7 |
from categories.accuracy import *
|
8 |
from categories.fluency import *
|
9 |
+
from categories.style import *
|
|
|
10 |
from modules.nav import Navbar
|
11 |
|
12 |
Navbar()
|
13 |
|
14 |
+
|
15 |
# Load translations from a JSON file to be used by the bot
|
16 |
def load_translations():
|
17 |
try:
|
|
|
42 |
acc = accuracy(source, prompt)
|
43 |
ppl = pseudo_perplexity(prompt)
|
44 |
gre = grammar_errors(prompt)
|
45 |
+
frm = formality(source, prompt)
|
46 |
|
47 |
+
total_score = (
|
48 |
+
0.5 * acc["score"]
|
49 |
+
+ 0.2 * gre["score"]
|
50 |
+
+ 0.3 * ppl["score"]
|
51 |
+
+ 0.005 * frm["score"]
|
52 |
+
)
|
53 |
|
54 |
if "scores" not in st.session_state:
|
55 |
st.session_state.scores = []
|
categories/accuracy.py
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
import string
|
2 |
|
3 |
-
import torch
|
4 |
import numpy as np
|
|
|
|
|
5 |
from scipy.spatial.distance import cosine
|
6 |
from simalign import SentenceAligner
|
7 |
from transformers import AutoModel, AutoTokenizer
|
8 |
-
from laser_encoders import LaserEncoderPipeline
|
9 |
|
10 |
# setup global variables on import (bad practice, but whatever)
|
11 |
# --------------------------------------------------------------
|
@@ -94,7 +94,10 @@ def __bertscore_to_percentage(similarity: float, debug: bool = False) -> float:
|
|
94 |
if debug:
|
95 |
scaled_score = similarity
|
96 |
else:
|
97 |
-
scaled_score = max(
|
|
|
|
|
|
|
98 |
|
99 |
# scaled_score = similarity
|
100 |
return round(scaled_score, 2)
|
|
|
1 |
import string
|
2 |
|
|
|
3 |
import numpy as np
|
4 |
+
import torch
|
5 |
+
from laser_encoders import LaserEncoderPipeline
|
6 |
from scipy.spatial.distance import cosine
|
7 |
from simalign import SentenceAligner
|
8 |
from transformers import AutoModel, AutoTokenizer
|
|
|
9 |
|
10 |
# setup global variables on import (bad practice, but whatever)
|
11 |
# --------------------------------------------------------------
|
|
|
94 |
if debug:
|
95 |
scaled_score = similarity
|
96 |
else:
|
97 |
+
scaled_score = max(
|
98 |
+
100 / (1 + np.exp(-11 * (similarity - 0.60))),
|
99 |
+
100 / (1 + np.exp(-5 * (similarity - 0.60))),
|
100 |
+
)
|
101 |
|
102 |
# scaled_score = similarity
|
103 |
return round(scaled_score, 2)
|
categories/style.py
CHANGED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
pipe = pipeline(
|
4 |
+
"text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
|
5 |
+
)
|
6 |
+
|
7 |
+
formality_score_map = {
|
8 |
+
"formal": {"formal": 58, "informal": 0, "neutral": 22},
|
9 |
+
"informal": {"formal": 0, "informal": 86, "neutral": 9.7},
|
10 |
+
"neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
|
11 |
+
}
|
12 |
+
|
13 |
+
|
14 |
+
def formality(src_sentence: str, trg_sentence: str) -> dict:
|
15 |
+
"""
|
16 |
+
Evaluate how well the formality of source (German) sentence is
|
17 |
+
in translation (English). Scores are normalized so that the best
|
18 |
+
possible match per source‐label is 100.
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
{
|
22 |
+
"raw_score": float, # the value from formality_score_map
|
23 |
+
"normalized": float, # raw_score / max_row * 100
|
24 |
+
"src_label": str,
|
25 |
+
"trg_label": str
|
26 |
+
}
|
27 |
+
"""
|
28 |
+
# classify source & target
|
29 |
+
src_label = pipe(src_sentence)[0]["label"].lower()
|
30 |
+
trg_label = pipe(trg_sentence)[0]["label"].lower()
|
31 |
+
|
32 |
+
# get raw score from the map
|
33 |
+
row = formality_score_map.get(src_label, {})
|
34 |
+
raw = row.get(trg_label, 0.0)
|
35 |
+
|
36 |
+
# normalize by that row's max
|
37 |
+
max_possible = max(row.values()) if row else 1.0
|
38 |
+
normalized = (raw / max_possible) * 100
|
39 |
+
|
40 |
+
return {
|
41 |
+
"raw_score": raw,
|
42 |
+
"normalized": round(normalized, 2),
|
43 |
+
"src_label": src_label,
|
44 |
+
"trg_label": trg_label,
|
45 |
+
}
|
modules/nav.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
import streamlit as st
|
2 |
|
|
|
3 |
def Navbar():
|
4 |
with st.sidebar:
|
5 |
st.title("Der Roboterlehrer")
|
6 |
st.markdown("### Translation Bots")
|
7 |
-
st.page_link(
|
8 |
-
st.page_link(
|
9 |
st.markdown("### Analysis")
|
10 |
-
st.page_link(
|
11 |
st.divider()
|
12 |
st.markdown("### About")
|
13 |
st.markdown(
|
14 |
"This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
|
15 |
-
)
|
|
|
1 |
import streamlit as st
|
2 |
|
3 |
+
|
4 |
def Navbar():
|
5 |
with st.sidebar:
|
6 |
st.title("Der Roboterlehrer")
|
7 |
st.markdown("### Translation Bots")
|
8 |
+
st.page_link("app.py", label="German to English", icon="🇩🇪")
|
9 |
+
st.page_link("pages/to_german.py", label="English to German", icon="🇬🇧")
|
10 |
st.markdown("### Analysis")
|
11 |
+
st.page_link("pages/scoring.py", label="Score Analysis", icon="📊")
|
12 |
st.divider()
|
13 |
st.markdown("### About")
|
14 |
st.markdown(
|
15 |
"This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
|
16 |
+
)
|
pages/scoring.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
import streamlit as st
|
2 |
import numpy as np
|
3 |
import pandas as pd
|
|
|
4 |
|
5 |
from modules.nav import Navbar
|
6 |
|
@@ -10,7 +10,7 @@ Navbar()
|
|
10 |
st.title("Score Analysis")
|
11 |
|
12 |
# Initialize session state for scores if it doesn't exist
|
13 |
-
if
|
14 |
st.session_state.scores = []
|
15 |
|
16 |
# Display scores if they exist
|
@@ -20,9 +20,9 @@ if st.session_state.scores:
|
|
20 |
|
21 |
if average_score > 90:
|
22 |
st.balloons()
|
23 |
-
|
24 |
# Display the average
|
25 |
st.header("Score Results")
|
26 |
st.metric(label="Average Score", value=f"{average_score:.2f}")
|
27 |
else:
|
28 |
-
st.info("No scores have been entered yet. Please chat with the bot first!")
|
|
|
|
|
1 |
import numpy as np
|
2 |
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
|
5 |
from modules.nav import Navbar
|
6 |
|
|
|
10 |
st.title("Score Analysis")
|
11 |
|
12 |
# Initialize session state for scores if it doesn't exist
|
13 |
+
if "scores" not in st.session_state:
|
14 |
st.session_state.scores = []
|
15 |
|
16 |
# Display scores if they exist
|
|
|
20 |
|
21 |
if average_score > 90:
|
22 |
st.balloons()
|
23 |
+
|
24 |
# Display the average
|
25 |
st.header("Score Results")
|
26 |
st.metric(label="Average Score", value=f"{average_score:.2f}")
|
27 |
else:
|
28 |
+
st.info("No scores have been entered yet. Please chat with the bot first!")
|
utilities/test_accuracy.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
-
from categories.accuracy import *
|
2 |
-
import json
|
3 |
import csv
|
|
|
|
|
4 |
from tqdm import tqdm
|
5 |
|
|
|
|
|
6 |
try:
|
7 |
with open("../data/translations.json", "r") as f:
|
8 |
translations = json.loads(f.read())
|
@@ -17,13 +19,13 @@ for t in tqdm(translations):
|
|
17 |
accuracy_scores.append(acc_s["score"])
|
18 |
|
19 |
# Create a CSV file
|
20 |
-
with open(
|
21 |
writer = csv.writer(csvfile)
|
22 |
# Write the header
|
23 |
-
writer.writerow([
|
24 |
# Write the data
|
25 |
print("\nWriting to CSV...")
|
26 |
for i, t in tqdm(enumerate(translations)):
|
27 |
-
writer.writerow([t[
|
28 |
|
29 |
print(f"CSV file created with {len(translations)} entries.")
|
|
|
|
|
|
|
1 |
import csv
|
2 |
+
import json
|
3 |
+
|
4 |
from tqdm import tqdm
|
5 |
|
6 |
+
from categories.accuracy import *
|
7 |
+
|
8 |
try:
|
9 |
with open("../data/translations.json", "r") as f:
|
10 |
translations = json.loads(f.read())
|
|
|
19 |
accuracy_scores.append(acc_s["score"])
|
20 |
|
21 |
# Create a CSV file
|
22 |
+
with open("accuracy_scores.csv", "w", newline="") as csvfile:
|
23 |
writer = csv.writer(csvfile)
|
24 |
# Write the header
|
25 |
+
writer.writerow(["German", "English", "Accuracy Score"])
|
26 |
# Write the data
|
27 |
print("\nWriting to CSV...")
|
28 |
for i, t in tqdm(enumerate(translations)):
|
29 |
+
writer.writerow([t["german"], t["english"], accuracy_scores[i]])
|
30 |
|
31 |
print(f"CSV file created with {len(translations)} entries.")
|