ak5005 commited on
Commit
9dda31e
·
1 Parent(s): 7f92e26

formality code added

Browse files
app.py CHANGED
@@ -1,16 +1,17 @@
1
- import time
2
  import json
 
 
3
 
4
  import streamlit as st
5
 
6
  from categories.accuracy import *
7
  from categories.fluency import *
8
- import random
9
-
10
  from modules.nav import Navbar
11
 
12
  Navbar()
13
 
 
14
  # Load translations from a JSON file to be used by the bot
15
  def load_translations():
16
  try:
@@ -41,8 +42,14 @@ def response_generator(prompt):
41
  acc = accuracy(source, prompt)
42
  ppl = pseudo_perplexity(prompt)
43
  gre = grammar_errors(prompt)
 
44
 
45
- total_score = 0.5 * acc["score"] + 0.2 * gre["score"] + 0.3 * ppl["score"]
 
 
 
 
 
46
 
47
  if "scores" not in st.session_state:
48
  st.session_state.scores = []
 
 
1
  import json
2
+ import random
3
+ import time
4
 
5
  import streamlit as st
6
 
7
  from categories.accuracy import *
8
  from categories.fluency import *
9
+ from categories.style import *
 
10
  from modules.nav import Navbar
11
 
12
  Navbar()
13
 
14
+
15
  # Load translations from a JSON file to be used by the bot
16
  def load_translations():
17
  try:
 
42
  acc = accuracy(source, prompt)
43
  ppl = pseudo_perplexity(prompt)
44
  gre = grammar_errors(prompt)
45
+ frm = formality(source, prompt)
46
 
47
+ total_score = (
48
+ 0.5 * acc["score"]
49
+ + 0.2 * gre["score"]
50
+ + 0.3 * ppl["score"]
51
+ + 0.005 * frm["score"]
52
+ )
53
 
54
  if "scores" not in st.session_state:
55
  st.session_state.scores = []
categories/accuracy.py CHANGED
@@ -1,11 +1,11 @@
1
  import string
2
 
3
- import torch
4
  import numpy as np
 
 
5
  from scipy.spatial.distance import cosine
6
  from simalign import SentenceAligner
7
  from transformers import AutoModel, AutoTokenizer
8
- from laser_encoders import LaserEncoderPipeline
9
 
10
  # setup global variables on import (bad practice, but whatever)
11
  # --------------------------------------------------------------
@@ -94,7 +94,10 @@ def __bertscore_to_percentage(similarity: float, debug: bool = False) -> float:
94
  if debug:
95
  scaled_score = similarity
96
  else:
97
- scaled_score = max(100 / (1 + np.exp(-11 * (similarity - 0.60))), 100 / (1 + np.exp(-5 * (similarity - 0.60))))
 
 
 
98
 
99
  # scaled_score = similarity
100
  return round(scaled_score, 2)
 
1
  import string
2
 
 
3
  import numpy as np
4
+ import torch
5
+ from laser_encoders import LaserEncoderPipeline
6
  from scipy.spatial.distance import cosine
7
  from simalign import SentenceAligner
8
  from transformers import AutoModel, AutoTokenizer
 
9
 
10
  # setup global variables on import (bad practice, but whatever)
11
  # --------------------------------------------------------------
 
94
  if debug:
95
  scaled_score = similarity
96
  else:
97
+ scaled_score = max(
98
+ 100 / (1 + np.exp(-11 * (similarity - 0.60))),
99
+ 100 / (1 + np.exp(-5 * (similarity - 0.60))),
100
+ )
101
 
102
  # scaled_score = similarity
103
  return round(scaled_score, 2)
categories/style.py CHANGED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ pipe = pipeline(
4
+ "text-classification", model="LenDigLearn/formality-classifier-mdeberta-v3-base"
5
+ )
6
+
7
+ formality_score_map = {
8
+ "formal": {"formal": 58, "informal": 0, "neutral": 22},
9
+ "informal": {"formal": 0, "informal": 86, "neutral": 9.7},
10
+ "neutral": {"formal": 20, "informal": 5.1, "neutral": 86},
11
+ }
12
+
13
+
14
+ def formality(src_sentence: str, trg_sentence: str) -> dict:
15
+ """
16
+ Evaluate how well the formality of source (German) sentence is
17
+ in translation (English). Scores are normalized so that the best
18
+ possible match per source‐label is 100.
19
+
20
+ Returns:
21
+ {
22
+ "raw_score": float, # the value from formality_score_map
23
+ "normalized": float, # raw_score / max_row * 100
24
+ "src_label": str,
25
+ "trg_label": str
26
+ }
27
+ """
28
+ # classify source & target
29
+ src_label = pipe(src_sentence)[0]["label"].lower()
30
+ trg_label = pipe(trg_sentence)[0]["label"].lower()
31
+
32
+ # get raw score from the map
33
+ row = formality_score_map.get(src_label, {})
34
+ raw = row.get(trg_label, 0.0)
35
+
36
+ # normalize by that row's max
37
+ max_possible = max(row.values()) if row else 1.0
38
+ normalized = (raw / max_possible) * 100
39
+
40
+ return {
41
+ "raw_score": raw,
42
+ "normalized": round(normalized, 2),
43
+ "src_label": src_label,
44
+ "trg_label": trg_label,
45
+ }
modules/nav.py CHANGED
@@ -1,15 +1,16 @@
1
  import streamlit as st
2
 
 
3
  def Navbar():
4
  with st.sidebar:
5
  st.title("Der Roboterlehrer")
6
  st.markdown("### Translation Bots")
7
- st.page_link('app.py', label='German to English', icon="🇩🇪")
8
- st.page_link('pages/to_german.py', label='English to German', icon="🇬🇧")
9
  st.markdown("### Analysis")
10
- st.page_link('pages/scoring.py', label='Score Analysis', icon="📊")
11
  st.divider()
12
  st.markdown("### About")
13
  st.markdown(
14
  "This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
15
- )
 
1
  import streamlit as st
2
 
3
+
4
  def Navbar():
5
  with st.sidebar:
6
  st.title("Der Roboterlehrer")
7
  st.markdown("### Translation Bots")
8
+ st.page_link("app.py", label="German to English", icon="🇩🇪")
9
+ st.page_link("pages/to_german.py", label="English to German", icon="🇬🇧")
10
  st.markdown("### Analysis")
11
+ st.page_link("pages/scoring.py", label="Score Analysis", icon="📊")
12
  st.divider()
13
  st.markdown("### About")
14
  st.markdown(
15
  "This app is a translation bot that helps you practice your language skills. It uses machine learning models to evaluate your translations and provide feedback."
16
+ )
pages/scoring.py CHANGED
@@ -1,6 +1,6 @@
1
- import streamlit as st
2
  import numpy as np
3
  import pandas as pd
 
4
 
5
  from modules.nav import Navbar
6
 
@@ -10,7 +10,7 @@ Navbar()
10
  st.title("Score Analysis")
11
 
12
  # Initialize session state for scores if it doesn't exist
13
- if 'scores' not in st.session_state:
14
  st.session_state.scores = []
15
 
16
  # Display scores if they exist
@@ -20,9 +20,9 @@ if st.session_state.scores:
20
 
21
  if average_score > 90:
22
  st.balloons()
23
-
24
  # Display the average
25
  st.header("Score Results")
26
  st.metric(label="Average Score", value=f"{average_score:.2f}")
27
  else:
28
- st.info("No scores have been entered yet. Please chat with the bot first!")
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import streamlit as st
4
 
5
  from modules.nav import Navbar
6
 
 
10
  st.title("Score Analysis")
11
 
12
  # Initialize session state for scores if it doesn't exist
13
+ if "scores" not in st.session_state:
14
  st.session_state.scores = []
15
 
16
  # Display scores if they exist
 
20
 
21
  if average_score > 90:
22
  st.balloons()
23
+
24
  # Display the average
25
  st.header("Score Results")
26
  st.metric(label="Average Score", value=f"{average_score:.2f}")
27
  else:
28
+ st.info("No scores have been entered yet. Please chat with the bot first!")
utilities/test_accuracy.py CHANGED
@@ -1,8 +1,10 @@
1
- from categories.accuracy import *
2
- import json
3
  import csv
 
 
4
  from tqdm import tqdm
5
 
 
 
6
  try:
7
  with open("../data/translations.json", "r") as f:
8
  translations = json.loads(f.read())
@@ -17,13 +19,13 @@ for t in tqdm(translations):
17
  accuracy_scores.append(acc_s["score"])
18
 
19
  # Create a CSV file
20
- with open('accuracy_scores.csv', 'w', newline='') as csvfile:
21
  writer = csv.writer(csvfile)
22
  # Write the header
23
- writer.writerow(['German', 'English', 'Accuracy Score'])
24
  # Write the data
25
  print("\nWriting to CSV...")
26
  for i, t in tqdm(enumerate(translations)):
27
- writer.writerow([t['german'], t['english'], accuracy_scores[i]])
28
 
29
  print(f"CSV file created with {len(translations)} entries.")
 
 
 
1
  import csv
2
+ import json
3
+
4
  from tqdm import tqdm
5
 
6
+ from categories.accuracy import *
7
+
8
  try:
9
  with open("../data/translations.json", "r") as f:
10
  translations = json.loads(f.read())
 
19
  accuracy_scores.append(acc_s["score"])
20
 
21
  # Create a CSV file
22
+ with open("accuracy_scores.csv", "w", newline="") as csvfile:
23
  writer = csv.writer(csvfile)
24
  # Write the header
25
+ writer.writerow(["German", "English", "Accuracy Score"])
26
  # Write the data
27
  print("\nWriting to CSV...")
28
  for i, t in tqdm(enumerate(translations)):
29
+ writer.writerow([t["german"], t["english"], accuracy_scores[i]])
30
 
31
  print(f"CSV file created with {len(translations)} entries.")