Spaces:

GroNLP
/

divemt_explorer

Sleeping

gsarti commited on Jan 23, 2023

Commit

02e892d

•

1 Parent(s): 3bd07b2

Use urllib

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from datasets import load_dataset, DownloadManager
 import streamlit as st
 from inseq import FeatureAttributionOutput
 st.set_page_config(layout="wide")
@@ -9,8 +10,6 @@ attribution_path = "https://huggingface.co/datasets/inseq/divemt_attributions/re
 df = dataset["train"].to_pandas()
 unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
 langs = list(df["lang_id"].unique())
-dl = DownloadManager()
 st.title("DivEMT Explorer 🔍 🌍")
 st.markdown("""
 ##### The DivEMT Explorer is a tool to explore translations and edits in the DivEMT corpus.
@@ -91,8 +90,11 @@ for lang in langs:
                 st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
                 for sentence_type in ["mt", "pe", "diff"]:
                     url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
-                    file_path = dl.download(url)
-                    attr = FeatureAttributionOutput.load(file_path, decompress=True)
                     if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
                         st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)

+from datasets import load_dataset
 import streamlit as st
+import urllib
 from inseq import FeatureAttributionOutput
 st.set_page_config(layout="wide")
 df = dataset["train"].to_pandas()
 unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
 langs = list(df["lang_id"].unique())
 st.title("DivEMT Explorer 🔍 🌍")
 st.markdown("""
 ##### The DivEMT Explorer is a tool to explore translations and edits in the DivEMT corpus.
                 st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
                 for sentence_type in ["mt", "pe", "diff"]:
                     url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
+                    g = urllib.request.urlopen(url)
+                    fpath = f"attr_{sentence_type}.json.gz"
+                    with open(fpath, 'b+w') as f:
+                        f.write(g.read())
+                    attr = FeatureAttributionOutput.load(fpath, decompress=True)
                     if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
                         st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)