gsarti commited on
Commit
02e892d
β€’
1 Parent(s): 3bd07b2

Use urllib

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
- from datasets import load_dataset, DownloadManager
2
  import streamlit as st
 
3
  from inseq import FeatureAttributionOutput
4
 
5
  st.set_page_config(layout="wide")
@@ -9,8 +10,6 @@ attribution_path = "https://huggingface.co/datasets/inseq/divemt_attributions/re
9
  df = dataset["train"].to_pandas()
10
  unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
11
  langs = list(df["lang_id"].unique())
12
- dl = DownloadManager()
13
-
14
  st.title("DivEMT Explorer πŸ” 🌍")
15
  st.markdown("""
16
  ##### The DivEMT Explorer is a tool to explore translations and edits in the DivEMT corpus.
@@ -91,8 +90,11 @@ for lang in langs:
91
  st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
92
  for sentence_type in ["mt", "pe", "diff"]:
93
  url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
94
- file_path = dl.download(url)
95
- attr = FeatureAttributionOutput.load(file_path, decompress=True)
 
 
 
96
  if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
97
  st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)
98
 
 
1
+ from datasets import load_dataset
2
  import streamlit as st
3
+ import urllib
4
  from inseq import FeatureAttributionOutput
5
 
6
  st.set_page_config(layout="wide")
 
10
  df = dataset["train"].to_pandas()
11
  unique_src = df[["item_id", "src_text"]].drop_duplicates(subset="item_id")
12
  langs = list(df["lang_id"].unique())
 
 
13
  st.title("DivEMT Explorer πŸ” 🌍")
14
  st.markdown("""
15
  ##### The DivEMT Explorer is a tool to explore translations and edits in the DivEMT corpus.
 
90
  st.text("Click on checkboxes to show/hide the respective attributions computed with mBART 1-to-50.")
91
  for sentence_type in ["mt", "pe", "diff"]:
92
  url = attribution_path.format(idx=item_id, setting=setting, sentence_type=sentence_type)
93
+ g = urllib.request.urlopen(url)
94
+ fpath = f"attr_{sentence_type}.json.gz"
95
+ with open(fpath, 'b+w') as f:
96
+ f.write(g.read())
97
+ attr = FeatureAttributionOutput.load(fpath, decompress=True)
98
  if st.checkbox(sentence_type.upper(), key=f"{lang}_{task_name}_{sentence_type}"):
99
  st.markdown(f"{attr.show(return_html=True, display=False, do_aggregation=False)}", unsafe_allow_html=True)
100