Spaces:

g8a9
/

ferret

Running

App Files Files Community

g8a9 commited on Jul 27, 2022

Commit

acdedb4

•

1 Parent(s): 43277af

add evaluation and prediction

Browse files

Files changed (2) hide show

requirements.txt +1 -1
single.py +77 -17

requirements.txt CHANGED Viewed

	@@ -1,2 +1,2 @@
1	transformers==4.20.1
2	- ferret-xai>=0.1.0


1	transformers==4.20.1
2	+ ferret-xai>=0.2.0

single.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import streamlit as st
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
 from ferret import Benchmark
 @st.cache()
@@ -8,40 +12,96 @@ def get_model(model_name):
     return AutoModelForSequenceClassification.from_pretrained(model_name)
 def get_tokenizer(tokenizer_name):
     return AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
 def body():
-    st.title("Evaluate using *ferret* !")
     st.markdown(
         """
-        ### 👋 Hi!
-        Insert down below your text, choose a model and fire up ferret. We will use
-        *ferret* to:
-        1. produce explanations with all supported methods
-        2. evaluate explanations on state-of-the-art **faithfulness metrics**.
         """
     )
-    col1, col2 = st.columns([1, 1])
     with col1:
-        model_name = st.text_input("HF Model", "g8a9/bert-base-cased_ami18")
     with col2:
-        tokenizer_name = st.text_input("HF Tokenizer", "bert-base-cased")
     text = st.text_input("Text")
     compute = st.button("Compute")
-    if compute and model_name and tokenizer_name:
-        model = get_model(model_name)
-        tokenizer = get_tokenizer(tokenizer_name)
-        bench = Benchmark(model, tokenizer)
-        explanations = bench.explain(text)
         st.dataframe(bench.show_table(explanations))

+from ctypes import DEFAULT_MODE
 import streamlit as st
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
 from ferret import Benchmark
+from torch.nn.functional import softmax
+DEFAULT_MODEL = "distilbert-base-uncased-finetuned-sst-2-english"
 @st.cache()
     return AutoModelForSequenceClassification.from_pretrained(model_name)
+@st.cache()
+def get_config(model_name):
+    return AutoConfig.from_pretrained(model_name)
 def get_tokenizer(tokenizer_name):
     return AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
 def body():
     st.markdown(
         """
+        # Welcome to the *ferret* showcase
+        You are working now on the *single instance* mode -- i.e., you will work and
+        inspect one textual query at a time.
+        ## Sentiment Analysis
+        Post-hoc explanation techniques discose the rationale behind a given prediction a model
+        makes while detecting a sentiment out of a text. In a sense the let you *poke* inside the model.
+        But **who watches the watchers**?
+        Let's find out!
+        Let's choose your favourite sentiment classification mode and let ferret do the rest.
+        We will:
+        1. download your model - if you're impatient, here it is a [cute video](https://www.youtube.com/watch?v=0Xks8t-SWHU) 🦜 for you;
+        2. explain using *ferret*'s built-in methods ⚙️
+        3. evaluate explanations with state-of-the-art **faithfulness metrics** 🚀
         """
     )
+    col1, col2 = st.columns([3, 1])
     with col1:
+        model_name = st.text_input("HF Model", DEFAULT_MODEL)
     with col2:
+        target = st.selectbox(
+            "Target",
+            options=range(5),
+            index=1,
+            help="Positional index of your target class.",
+        )
     text = st.text_input("Text")
     compute = st.button("Compute")
+    if compute and model_name:
+        with st.spinner("Preparing the magic. Hang in there..."):
+            model = get_model(model_name)
+            tokenizer = get_tokenizer(model_name)
+            config = get_config(model_name)
+            bench = Benchmark(model, tokenizer)
+        st.markdown("### Prediction")
+        scores = bench.score(text)
+        scores_str = ", ".join(
+            [f"{config.id2label[l]}: {s:.2f}" for l, s in enumerate(scores)]
+        )
+        st.text(scores_str)
+        with st.spinner("Computing Explanations.."):
+            explanations = bench.explain(text, target=target)
+        st.markdown("### Explanations")
         st.dataframe(bench.show_table(explanations))
+        with st.spinner("Evaluating Explanations..."):
+            evaluations = bench.evaluate_explanations(
+                explanations, target=target, apply_style=False
+            )
+        st.markdown("### Faithfulness Metrics")
+        st.dataframe(bench.show_evaluation_table(evaluations))
+        st.markdown(
+            """
+            **Legend**
+            - **AOPC Comprehensiveness** (aopc_compr) measures *comprehensiveness*, i.e., if the
+            explanation captures
+            - **AOPC Sufficiency** (aopc_suff) measures *sufficiency*, i.e.,
+            - **Leave-On-Out TAU Correlation** (taucorr_loo) measures
+            See the paper for details.
+            """
+        )