Spaces:

passaglia
/

yomikata-demo

Build error

App Files Files Community

Sam Passaglia commited on Feb 20, 2023

Commit

d41e82b

•

1 Parent(s): 5be9747

minor

Browse files

Files changed (2) hide show

app.py +42 -21
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 """app.py
 streamlit demo of yomikata"""
 import pandas as pd
 import spacy
 import streamlit as st
@@ -8,9 +10,9 @@ from speach import ttlig
 from yomikata import utils
 from yomikata.dictionary import Dictionary
 from yomikata.utils import parse_furigana
-from pathlib import Path
-@st.cache_data
 def add_border(html: str):
     WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.5rem; padding: 1rem; margin-bottom: 1.0rem; display: inline-block">{}</div>"""
     html = html.replace("\n", " ")
@@ -23,46 +25,65 @@ def get_random_sentence():
     df = pd.read_csv(Path(TEST_DATA_DIR, "test_optimized_strict_heteronyms.csv"))
     return df.sample(1).iloc[0].sentence
-@st.cache_data
 def get_dbert_prediction_and_heteronym_list(text):
     from yomikata.dbert import dBert
     reader = dBert()
     return reader.furigana(text), reader.heteronyms
-@st.cache_data
 def get_stats():
     from config import config
     from yomikata.utils import load_dict
     stats = load_dict(Path(config.STORES_DIR, "dbert/training_performance.json"))
-    global_accuracy = stats['test']['accuracy']
-    stats = stats['test']['heteronym_performance']
     heteronyms = stats.keys()
-    accuracy = [stats[heteronym]['accuracy'] for heteronym in heteronyms]
-    readings = [ "、".join(["{reading} ({correct}/{n})".format(reading=reading, correct=stats[heteronym]['readings'][reading]['found'][reading], n=stats[heteronym]['readings'][reading]['n']) for reading in stats[heteronym]['readings'].keys() if (stats[heteronym]['readings'][reading]['found'][reading] !=0 or reading != '<OTHER>')]) for heteronym in heteronyms ]
-    #if reading != '<OTHER>'
-    df = pd.DataFrame({'heteronym': heteronyms, 'accuracy': accuracy, 'readings': readings} )
-    df = df[df['readings'].str.contains('、')]
-    df['readings'] =  df['readings'].str.replace('<OTHER>', 'Other')
-    df = df.rename(columns={'readings':'readings (test corr./total)'})
-    df= df.sort_values('accuracy', ascending=False, ignore_index=True)
-    df.index += 1
     return global_accuracy, df
-@st.cache_data
 def furigana_to_spacy(text_with_furigana):
     tokens = parse_furigana(text_with_furigana)
     ents = []
@@ -116,9 +137,7 @@ label_colors = {
     reading: colors[i % len(colors)]
     for i, reading in enumerate(set([item["label"] for item in spacy_dict["ents"]]))
 }
-html = spacy.displacy.render(
-    spacy_dict, style="ent", manual=True, options={"colors": label_colors}
-)
 if len(spacy_dict["ents"]) > 0:
     st.markdown("**Yomikata** found and disambiguated the following heteronyms:")
@@ -164,7 +183,9 @@ if st.button("🎲 Randomize the input sentence"):
 # Stats section
 global_accuracy, stats_df = get_stats()
-st.subheader(f"{len(stats_df)} heteronyms supported, with a global accuracy of {global_accuracy:.0%}")
 st.dataframe(stats_df)

 """app.py
 streamlit demo of yomikata"""
+from pathlib import Path
 import pandas as pd
 import spacy
 import streamlit as st
 from yomikata import utils
 from yomikata.dictionary import Dictionary
 from yomikata.utils import parse_furigana
+@st.cache
 def add_border(html: str):
     WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.5rem; padding: 1rem; margin-bottom: 1.0rem; display: inline-block">{}</div>"""
     html = html.replace("\n", " ")
     df = pd.read_csv(Path(TEST_DATA_DIR, "test_optimized_strict_heteronyms.csv"))
     return df.sample(1).iloc[0].sentence
+@st.cache
 def get_dbert_prediction_and_heteronym_list(text):
     from yomikata.dbert import dBert
     reader = dBert()
     return reader.furigana(text), reader.heteronyms
+@st.cache
 def get_stats():
     from config import config
     from yomikata.utils import load_dict
     stats = load_dict(Path(config.STORES_DIR, "dbert/training_performance.json"))
+    global_accuracy = stats["test"]["accuracy"]
+    stats = stats["test"]["heteronym_performance"]
     heteronyms = stats.keys()
+    accuracy = [stats[heteronym]["accuracy"] for heteronym in heteronyms]
+    readings = [
+        "、".join(
+            [
+                "{reading} ({correct}/{n})".format(
+                    reading=reading,
+                    correct=stats[heteronym]["readings"][reading]["found"][reading],
+                    n=stats[heteronym]["readings"][reading]["n"],
+                )
+                for reading in stats[heteronym]["readings"].keys()
+                if (
+                    stats[heteronym]["readings"][reading]["found"][reading] != 0
+                    or reading != "<OTHER>"
+                )
+            ]
+        )
+        for heteronym in heteronyms
+    ]
+    # if reading != '<OTHER>'
+    df = pd.DataFrame({"heteronym": heteronyms, "accuracy": accuracy, "readings": readings})
+    df = df[df["readings"].str.contains("、")]
+    df["readings"] = df["readings"].str.replace("<OTHER>", "Other")
+    df = df.rename(columns={"readings": "readings (test corr./total)"})
+    df = df.sort_values("accuracy", ascending=False, ignore_index=True)
+    df.index += 1
     return global_accuracy, df
+@st.cache
 def furigana_to_spacy(text_with_furigana):
     tokens = parse_furigana(text_with_furigana)
     ents = []
     reading: colors[i % len(colors)]
     for i, reading in enumerate(set([item["label"] for item in spacy_dict["ents"]]))
 }
+html = spacy.displacy.render(spacy_dict, style="ent", manual=True, options={"colors": label_colors})
 if len(spacy_dict["ents"]) > 0:
     st.markdown("**Yomikata** found and disambiguated the following heteronyms:")
 # Stats section
 global_accuracy, stats_df = get_stats()
+st.subheader(
+    f"{len(stats_df)} heteronyms supported, with a global accuracy of {global_accuracy:.0%}"
+)
 st.dataframe(stats_df)

requirements.txt CHANGED Viewed

@@ -15,5 +15,5 @@ transformers>=4.25.1
 datasets>=2.7.1
 pynvml==11.4.1
 sentencepiece>=0.1.97
-streamlit>=1.18.1
 rich

 datasets>=2.7.1
 pynvml==11.4.1
 sentencepiece>=0.1.97
+streamlit==1.17.0
 rich