g8a9 commited on
Commit
b0390ea
1 Parent(s): 6b4e3c7

add corpus page

Browse files
Files changed (2) hide show
  1. corpus.py +140 -1
  2. single.py +1 -2
corpus.py CHANGED
@@ -1,5 +1,144 @@
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  def body():
5
- st.text("TBD")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctypes import DEFAULT_MODE
2
  import streamlit as st
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
4
+ from ferret import Benchmark
5
+ from torch.nn.functional import softmax
6
+
7
+ DEFAULT_MODEL = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
8
+
9
+
10
+ @st.cache()
11
+ def get_model(model_name):
12
+ return AutoModelForSequenceClassification.from_pretrained(model_name)
13
+
14
+
15
+ @st.cache()
16
+ def get_config(model_name):
17
+ return AutoConfig.from_pretrained(model_name)
18
+
19
+
20
+ def get_tokenizer(tokenizer_name):
21
+ return AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
22
 
23
 
24
  def body():
25
+
26
+ st.title("Evaluate explanations on dataset samples")
27
+
28
+ st.markdown(
29
+ """
30
+ Let's test how our built-in explainers behave on state-of-the-art datasets for explanability.
31
+
32
+ *ferret* exposes an extensible Dataset API. We currently implement [MovieReviews](https://huggingface.co/datasets/movie_rationales) and [HateXPlain](https://huggingface.co/datasets/hatexplain).
33
+
34
+ In this demo, you let you experiment with HateXPlain.
35
+ You just need to choose a prediction model and a set of samples to test.
36
+ We will trigger *ferret* to:
37
+
38
+ 1. download the model;
39
+ 2. explain every sample you did choose;
40
+ 3. average all faithfulness and plausibility metrics we support 📊
41
+ """
42
+ )
43
+
44
+ col1, col2 = st.columns([3, 1])
45
+ with col1:
46
+ model_name = st.text_input("HF Model", DEFAULT_MODEL)
47
+ config = AutoConfig.from_pretrained(model_name)
48
+
49
+ with col2:
50
+ class_labels = list(config.id2label.values())
51
+ target = st.selectbox(
52
+ "Target",
53
+ options=class_labels,
54
+ index=1,
55
+ help="Class label you want to explain.",
56
+ )
57
+
58
+ samples_string = st.text_input(
59
+ "List of samples",
60
+ "11,6,42",
61
+ help="List of indices in the dataset, comma-separated.",
62
+ )
63
+ samples = map(int, samples_string.split(","))
64
+
65
+ compute = st.button("Run")
66
+
67
+ if compute and model_name:
68
+
69
+ with st.spinner("Preparing the magic. Hang in there..."):
70
+ model = get_model(model_name)
71
+ tokenizer = get_tokenizer(model_name)
72
+ bench = Benchmark(model, tokenizer)
73
+
74
+ with st.spinner("Explaining sample (this might take a while)..."):
75
+
76
+ @st.cache()
77
+ def compute_table(samples):
78
+ data = bench.load_dataset("hatexplain")
79
+ sample_evaluations = bench.evaluate_samples(data, samples)
80
+ table = bench.show_samples_evaluation_table(sample_evaluations)
81
+ return table
82
+
83
+ table = compute_table(samples)
84
+
85
+ st.markdown("### Averaged metrics")
86
+ st.dataframe(table)
87
+ st.caption("Darker colors mean better performance.")
88
+
89
+ # scores = bench.score(text)
90
+ # scores_str = ", ".join(
91
+ # [f"{config.id2label[l]}: {s:.2f}" for l, s in enumerate(scores)]
92
+ # )
93
+ # st.text(scores_str)
94
+
95
+ # with st.spinner("Computing Explanations.."):
96
+ # explanations = bench.explain(text, target=class_labels.index(target))
97
+
98
+ # st.markdown("### Explanations")
99
+ # st.dataframe(bench.show_table(explanations))
100
+ # st.caption("Darker red (blue) means higher (lower) contribution.")
101
+
102
+ # with st.spinner("Evaluating Explanations..."):
103
+ # evaluations = bench.evaluate_explanations(
104
+ # explanations, target=class_labels.index(target), apply_style=False
105
+ # )
106
+
107
+ # st.markdown("### Faithfulness Metrics")
108
+ # st.dataframe(bench.show_evaluation_table(evaluations))
109
+ # st.caption("Darker colors mean better performance.")
110
+
111
+ st.markdown(
112
+ """
113
+ **Legend**
114
+
115
+ - **AOPC Comprehensiveness** (aopc_compr) measures *comprehensiveness*, i.e., if the explanation captures all the tokens needed to make the prediction. Higher is better.
116
+
117
+ - **AOPC Sufficiency** (aopc_suff) measures *sufficiency*, i.e., if the relevant tokens in the explanation are sufficient to make the prediction. Lower is better.
118
+
119
+ - **Leave-On-Out TAU Correlation** (taucorr_loo) measures the Kendall rank correlation coefficient τ between the explanation and leave-one-out importances. Closer to 1 is better.
120
+
121
+ See the paper for details.
122
+ """
123
+ )
124
+
125
+ st.markdown(
126
+ """
127
+ **In code, it would be as simple as**
128
+ """
129
+ )
130
+ st.code(
131
+ f"""
132
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
133
+ from ferret import Benchmark
134
+
135
+ model = AutoModelForSequenceClassification.from_pretrained("{model_name}")
136
+ tokenizer = AutoTokenizer.from_pretrained("{model_name}")
137
+
138
+ bench = Benchmark(model, tokenizer)
139
+ data = bench.load_dataset("hatexplain")
140
+ evaluations = bench.evaluate_samples(data, {samples})
141
+ bench.show_samples_evaluation_table(evaluations)
142
+ """,
143
+ language="python",
144
+ )
single.py CHANGED
@@ -1,4 +1,3 @@
1
- from ctypes import DEFAULT_MODE
2
  import streamlit as st
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
4
  from ferret import Benchmark
@@ -63,7 +62,7 @@ def body():
63
 
64
  text = st.text_input("Text", "I love your style!")
65
 
66
- compute = st.button("Compute")
67
 
68
  if compute and model_name:
69
 
 
 
1
  import streamlit as st
2
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoConfig
3
  from ferret import Benchmark
 
62
 
63
  text = st.text_input("Text", "I love your style!")
64
 
65
+ compute = st.button("Run")
66
 
67
  if compute and model_name:
68