Spaces:

ak5005
/

derrobot

Sleeping

App Files Files Community

ak5005 commited on 15 days ago

Commit

9dda31e

1 Parent(s): 7f92e26

formality code added

Browse files

Files changed (6) hide show

app.py +11 -4
categories/accuracy.py +6 -3
categories/style.py +45 -0
modules/nav.py +5 -4
pages/scoring.py +4 -4
utilities/test_accuracy.py +7 -5

app.py CHANGED Viewed

@@ -1,16 +1,17 @@
-import time
 import json
 import streamlit as st
 from categories.accuracy import *
 from categories.fluency import *
-import random
 from modules.nav import Navbar
 Navbar()
 # Load translations from a JSON file to be used by the bot
 def load_translations():
     try:
@@ -41,8 +42,14 @@ def response_generator(prompt):
     acc = accuracy(source, prompt)
     ppl = pseudo_perplexity(prompt)
     gre = grammar_errors(prompt)
-    total_score = 0.5 * acc["score"] + 0.2 * gre["score"] + 0.3 * ppl["score"]
     if "scores" not in st.session_state:
         st.session_state.scores = []

 import json
+import random
+import time
 import streamlit as st
 from categories.accuracy import *
 from categories.fluency import *
+from categories.style import *
 from modules.nav import Navbar
 Navbar()
 # Load translations from a JSON file to be used by the bot
 def load_translations():
     try:
     acc = accuracy(source, prompt)
     ppl = pseudo_perplexity(prompt)
     gre = grammar_errors(prompt)
+    frm = formality(source, prompt)
+    total_score = (
+        0.5 * acc["score"]
+        + 0.2 * gre["score"]
+        + 0.3 * ppl["score"]
+        + 0.005 * frm["score"]
+    )
     if "scores" not in st.session_state:
         st.session_state.scores = []

categories/accuracy.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import string
-import torch
 import numpy as np
 from scipy.spatial.distance import cosine
 from simalign import SentenceAligner
 from transformers import AutoModel, AutoTokenizer
-from laser_encoders import LaserEncoderPipeline
 # setup global variables on import (bad practice, but whatever)
 # --------------------------------------------------------------
@@ -94,7 +94,10 @@ def __bertscore_to_percentage(similarity: float, debug: bool = False) -> float:
     if debug:
         scaled_score = similarity
     else:
-        scaled_score = max(100 / (1 + np.exp(-11 * (similarity - 0.60))), 100 / (1 + np.exp(-5 * (similarity - 0.60))))
     # scaled_score = similarity
     return round(scaled_score, 2)

 import string
 import numpy as np
+import torch
+from laser_encoders import LaserEncoderPipeline
 from scipy.spatial.distance import cosine
 from simalign import SentenceAligner
 from transformers import AutoModel, AutoTokenizer
 # setup global variables on import (bad practice, but whatever)
 # --------------------------------------------------------------
     if debug:
         scaled_score = similarity
     else:
+        scaled_score = max(
+            100 / (1 + np.exp(-11 * (similarity - 0.60))),
+            100 / (1 + np.exp(-5 * (similarity - 0.60))),
+        )
     # scaled_score = similarity
     return round(scaled_score, 2)

categories/style.py CHANGED Viewed

	@@ -0,0 +1,45 @@

+from transformers import pipeline
+pipe = pipeline(
+    "text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
+)
+formality_score_map = {
+    "formal": {"formal": 58, "informal": 0, "neutral": 22},
+    "informal": {"formal": 0, "informal": 86, "neutral": 9.7},
+    "neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
+}
+def formality(src_sentence: str, trg_sentence: str) -> dict:
+    """
+    Evaluate how well the formality of source (German) sentence is
+    in translation (English).  Scores are normalized so that the best
+    possible match per source‐label is 100.
+    Returns:
+        {
+          "raw_score": float,        # the value from formality_score_map
+          "normalized": float,       # raw_score / max_row * 100
+          "src_label": str,
+          "trg_label": str
+        }
+    """
+    # classify source & target
+    src_label = pipe(src_sentence)[0]["label"].lower()
+    trg_label = pipe(trg_sentence)[0]["label"].lower()
+    # get raw score from the map
+    row = formality_score_map.get(src_label, {})
+    raw = row.get(trg_label, 0.0)
+    # normalize by that row's max
+    max_possible = max(row.values()) if row else 1.0
+    normalized = (raw / max_possible) * 100
+    return {
+        "raw_score": raw,
+        "normalized": round(normalized, 2),
+        "src_label": src_label,
+        "trg_label": trg_label,
+    }

modules/nav.py CHANGED Viewed

@@ -1,15 +1,16 @@
 import streamlit as st
 def Navbar():
     with st.sidebar:
         st.title("Der Roboterlehrer")
         st.markdown("### Translation Bots")
-        st.page_link('app.py', label='German to English', icon="🇩🇪")
-        st.page_link('pages/to_german.py', label='English to German', icon="🇬🇧")
         st.markdown("### Analysis")
-        st.page_link('pages/scoring.py', label='Score Analysis', icon="📊")
         st.divider()
         st.markdown("### About")
         st.markdown(
             "This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
-        )

 import streamlit as st
 def Navbar():
     with st.sidebar:
         st.title("Der Roboterlehrer")
         st.markdown("### Translation Bots")
+        st.page_link("app.py", label="German to English", icon="🇩🇪")
+        st.page_link("pages/to_german.py", label="English to German", icon="🇬🇧")
         st.markdown("### Analysis")
+        st.page_link("pages/scoring.py", label="Score Analysis", icon="📊")
         st.divider()
         st.markdown("### About")
         st.markdown(
             "This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
+        )

pages/scoring.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import streamlit as st
 import numpy as np
 import pandas as pd
 from modules.nav import Navbar
@@ -10,7 +10,7 @@ Navbar()
 st.title("Score Analysis")
 # Initialize session state for scores if it doesn't exist
-if 'scores' not in st.session_state:
     st.session_state.scores = []
 # Display scores if they exist
@@ -20,9 +20,9 @@ if st.session_state.scores:
     if average_score > 90:
         st.balloons()
     # Display the average
     st.header("Score Results")
     st.metric(label="Average Score", value=f"{average_score:.2f}")
 else:
-    st.info("No scores have been entered yet. Please chat with the bot first!")

 import numpy as np
 import pandas as pd
+import streamlit as st
 from modules.nav import Navbar
 st.title("Score Analysis")
 # Initialize session state for scores if it doesn't exist
+if "scores" not in st.session_state:
     st.session_state.scores = []
 # Display scores if they exist
     if average_score > 90:
         st.balloons()
     # Display the average
     st.header("Score Results")
     st.metric(label="Average Score", value=f"{average_score:.2f}")
 else:
+    st.info("No scores have been entered yet. Please chat with the bot first!")

utilities/test_accuracy.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from categories.accuracy import *
-import json
 import csv
 from tqdm import tqdm
 try:
     with open("../data/translations.json", "r") as f:
         translations = json.loads(f.read())
@@ -17,13 +19,13 @@ for t in tqdm(translations):
     accuracy_scores.append(acc_s["score"])
 # Create a CSV file
-with open('accuracy_scores.csv', 'w', newline='') as csvfile:
     writer = csv.writer(csvfile)
     # Write the header
-    writer.writerow(['German', 'English', 'Accuracy Score'])
     # Write the data
     print("\nWriting to CSV...")
     for i, t in tqdm(enumerate(translations)):
-        writer.writerow([t['german'], t['english'], accuracy_scores[i]])
 print(f"CSV file created with {len(translations)} entries.")

 import csv
+import json
 from tqdm import tqdm
+from categories.accuracy import *
 try:
     with open("../data/translations.json", "r") as f:
         translations = json.loads(f.read())
     accuracy_scores.append(acc_s["score"])
 # Create a CSV file
+with open("accuracy_scores.csv", "w", newline="") as csvfile:
     writer = csv.writer(csvfile)
     # Write the header
+    writer.writerow(["German", "English", "Accuracy Score"])
     # Write the data
     print("\nWriting to CSV...")
     for i, t in tqdm(enumerate(translations)):
+        writer.writerow([t["german"], t["english"], accuracy_scores[i]])
 print(f"CSV file created with {len(translations)} entries.")