File size: 11,745 Bytes
90f4ec6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# --- Streamlit ---
import streamlit as st

# --- Data ---
import robustnessgym as rg
import pandas as pd

# --- Misc ---
from math import floor
from random import sample
from interactive_model_cards import utils as ut


def format_data(user_text, model):
    """ Helper Function : Formatting and preparing the user's input data"""

    # adding user data to the data panel
    dp = rg.DataPanel({"sentence": [user_text], "label": [1]})

    # run prediction
    dp, pred = ut.update_pred(dp, model)

    # summarizing the prediction

    idx_max = pred["Probability"].argmax()
    pred_sum = pred["Label"][idx_max]
    pred_bin = int(1) if pred["Label"][idx_max] == "Positive Sentiment" else int(0)
    pred_num = floor(pred["Probability"][idx_max] * 10 ** 3) / 10 ** 3
    pred_conf = ut.conf_level(pred["Probability"][idx_max])

    new_example = {
        "sentence": user_text,
        "model label": pred_sum,
        "model label binary": pred_bin,
        "probability": pred_num,
        "confidence": pred_conf,
        "user label": None,
        "user label binary": None,
    }

    return new_example


def slice_misc(table):
    """ Helper Function: format new slice"""
    table = st.session_state["user_data"][
        ["sentence", "model label binary", "user label binary"]
    ]
    table.columns = ["sentence", "pred", "label"]

    dp = rg.DataPanel(
        {
            "sentence": table["sentence"].tolist(),
            "label": table["label"].tolist(),
            "pred": table["pred"].tolist(),
        }
    )

    # give the sentence a name
    dp._identifier = "Your Sentences"

    # updated the dev bench
    rg_bench = ut.new_bench()
    rg_bench.add_slices(dp)

    return rg_bench


# ***** ADDING CUSTOM SENTENCES *******
def examples():
    """ DEPRECATED METHOD FOR UI for displaying the custom sentences"""

    # writing the metrics out to a column
    st.markdown("** Custom Example Sentences **")

    if not st.session_state["user_data"].empty:
        # remove the user data slice

        # visualize the overall performance
        st.markdown("*Model Performance*")
        key = "Your Sentences"
        all_metrics = {key: {}}
        all_metrics[key]["metrics"] = st.session_state["quant_ex"][ "User Custom Sentence"][key]
        all_metrics[key]["source"] = key

        # chart = ut.visualize_metrics(st.session_state["quant_ex"]["User Custom Sentence"])
        chart = ut.visualize_metrics(all_metrics, col_val="#ff7f0e")
        st.altair_chart(chart)

        # add to overall model performance
        # visualize examples
        st.markdown("*Examples*")
        st.dataframe(
            st.session_state["user_data"][
                ["sentence", "model label", "user label", "probability"]
            ]
        )
    else:
        st.write("No examples added yet")


def example_sentence(sentence_examples, model,doc2vec):
    """ UI for creating a custom sentences"""

    # **** Entering Text ***
    placeholder = st.empty()
    user_text = placeholder.text_input(
        "Write your own example sentences, or click 'Get Suggest Examples'",
        st.session_state["example_sent"],
    )

    gen_button = st.button("Get Suggested Example", key="user_text")

    if gen_button:
        st.session_state["example_sent"] = sample(
            set(sentence_examples["sentences"]), 1
        )[0]

        user_text = placeholder.text_input(
            "Write your own example sentences, or click 'Get Suggested Example'",
            st.session_state["example_sent"],
        )

    if user_text != "":

        new_example = format_data(user_text, model)

        # **** Prediction Summary ***
        with st.form(key="my_form"):
            st.markdown("**Model Prediction Summary**")
            st.markdown(
                f"*The sentiment model predicts that this sentence has an overall `{new_example['model label']}` with an `{new_example['confidence']}` (p={new_example['probability']})*"
            )

            # prediction agreement solicitation
            st.markdown("**Do you agree with the prediction?**")
            agreement = st.radio("Indicate your agreement below", ["Agree", "Disagree"])

            # getting the user label
            user_lab = new_example["model label"]
            user_lab_bin = (
                int(1) if new_example["model label"] == "Positive Sentiment" else int(0)
            )

            if agreement != "Agree":
                user_lab = (
                    "Negative Sentiment"
                    if new_example["model label"] == "Positive Sentiment"
                    else "Positive Sentiment"
                )
                user_lab_bin = int(0) if user_lab_bin == 1 else int(1)

            # update robustness gym with user_example prediction
            if st.form_submit_button("Add to exisiting sentences"):
                # updating the user data frame
                if user_text != "":
                    new_example["user label"] = user_lab
                    new_example["user label binary"] = user_lab_bin

                    # data frame to append to session info
                    new_example = pd.DataFrame(new_example, index=[0])

                    # update the session
                    st.session_state["user_data"] = st.session_state[
                        "user_data"
                    ].append(new_example, ignore_index=True)

                    # update the user data dev bench
                    user_bench = slice_misc(st.session_state["user_data"])

                    # add bench
                    st.session_state["quant_ex"][
                        "User Custom Sentence"
                    ] = user_bench.metrics["model"]

                    #update the selected data
                    st.session_state["selected_slice"] = {
                        'name':'Your Sentences',
                        'source': 'User Custom Sentence',
                    }

                    #update the sentence with an embedding
                    embedding = st.session_state["embedding"]
                    tmp = ut.prep_sentence_embedding(name ='Your Sentences',
                                      source = 'User Custom Sentence',
                                      sentence = user_text,
                                      sentiment= user_lab,
                                      sort_order= 100, #always put it on top
                                      embed_model = doc2vec,
                                      idx = max(embedding.index)+1)

                    st.session_state["embedding"] = embedding.append(tmp)

# ***** DEFINTING CUSTOM SUBGROUPS *******
def subpopulation_slice(sst_db,doc2vec):
    with st.form(key="subpop_form"):
        st.markdown("Define you subpopulation")
        user_terms = st.text_input(
            "Enter a set of comma separated words", "comedy, hilarious, clown"
        )
        slice_choice = st.selectbox(
            "Choose Data Source", ["Training Data", "Evaluation Data"]
        )
        slice_name = st.text_input(
            "Give your subpopulation a name", "subpop_1", key="custom_slice_name"
        )
        if st.form_submit_button("Create Subpopulation"):
            # build a new slice
            user_terms = [x.strip() for x in user_terms.split(",")]
            slice_builder = rg.HasAnyPhrase([user_terms], identifiers=[slice_name])
       
            # on test data
            slice_ids = ut.get_sliceid(list(sst_db.slices))
            if slice_choice == "Training  Data":
                #st.write("returning training data")
                idx = ut.get_sliceidx(slice_ids,"xyz_train")
            else:
                #st.write("returning evaluation data")
                idx = ut.get_sliceidx(slice_ids,"xyz_test")
            
            sst_db(slice_builder, list(sst_db.slices)[idx], ["sentence"])

            #get store slice name
            slice_ids = ut.get_sliceid(list(sst_db.slices))
            slice_idx= [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)][0]
            slice_rg_name = [elem for i, elem in enumerate(slice_ids) if slice_name in str(elem)]
            
            slice_data = list(sst_db.slices)[slice_idx]
            

            # updating the the selected slice
            st.session_state["selected_slice"] = {
                    'name': slice_rg_name[0],
                    'source': 'Custom Slice',
                }
        
            #storing the slice terms
            st.session_state["slice_terms"][slice_rg_name[0]] = user_terms

            #adding slice to embedding
            #update the sentence with an embedding

            embedding = st.session_state["embedding"]
            tmp = ut.prep_sentence_embedding(name = slice_name,
                source = "Custom Slice",
                sentence = slice_data['sentence'],
                sentiment= ["Positive Sentiment" if int(round(x)) == 1 else "Negative Sentiment" for x in slice_data["label"]],
                sort_order=5,
                embed_model = doc2vec,
                idx = max(embedding.index)+1,
                type="multi")

            st.session_state["embedding"] = embedding.append(tmp)

            return slice_name


def slice_vis(terms, sst_db, slice_name):
    ''' DEPRECIATED FUNCTION TO VISUALIZE SLICE DATA'''
    st.write(terms)
    # TO DO - FORMATTING AND ADD METRICS
    if len(list(sst_db.slices)) > 2:
        # write out the dataset for this subset

        # get selected slice data
        slice_ids = ut.get_sliceid(list(sst_db.slices))
        idx = [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)]

        if len(idx) > 1:
            raise ValueError("More than one slice with the same name")
        else:
            idx = idx[0]

        if idx is not None:
            slice_data = list(sst_db.slices)[idx]
            slice_id = str(slice_data._identifier)

            # visualize performance
            all_metrics = ut.metrics_to_dict(sst_db.metrics["model"], slice_id)
            chart = ut.visualize_metrics(all_metrics)
            st.altair_chart(chart)

            # write slice data to UI
            st.dataframe(ut.slice_to_df(slice_data))
        else:
            st.write("No slice found")


# ***** EXAMPLE PANEL UI *******
def example_panel(sentence_examples, model, sst_db,doc2vec):
    """ Layout for the custom example panel"""

    # Data Expander
    '''
    st.markdown(
        "Here's an overview of the ways you can add customized the performance results. Using the drop down menu above, you can choose from one of three options"
    )
    st.markdown(
        "1. **Define a new subpopulation** : Create a new subset from the model's training or testing data"
    )
    st.markdown("1. **Add your own sentences** : Add your own sentences as examples")
    st.markdown(
        "3. **Add your own dataset** : Upload your own (small) dataset from a csv file"
    )
    '''
    st.markdown("Modify the quantitative analysis results by defining your own subpopulations in the data, including your own data by adding your own sentences or dataset.")

    with st.expander("Explore new subpopulations in model data"):
            # create slice
            slice_terms = subpopulation_slice(sst_db,doc2vec)

            # visualize slice
            slice_name = st.session_state["custom_slice_name"]

    with st.expander("Explore with your own sentences"):
        # adding a column for user text input
        example_sentence(sentence_examples, model,doc2vec)
        # examples()
    with st.expander("Explore with your own dataset"):
            st.error("This feature is not enabled for the online deployment")
__all__=["example_panel"]