nazneen's picture
interactive model card streamlit app
90f4ec6
raw history blame
No virus
11.7 kB
# --- Streamlit ---
import streamlit as st
# --- Data ---
import robustnessgym as rg
import pandas as pd
# --- Misc ---
from math import floor
from random import sample
from interactive_model_cards import utils as ut
def format_data(user_text, model):
""" Helper Function : Formatting and preparing the user's input data"""
# adding user data to the data panel
dp = rg.DataPanel({"sentence": [user_text], "label": [1]})
# run prediction
dp, pred = ut.update_pred(dp, model)
# summarizing the prediction
idx_max = pred["Probability"].argmax()
pred_sum = pred["Label"][idx_max]
pred_bin = int(1) if pred["Label"][idx_max] == "Positive Sentiment" else int(0)
pred_num = floor(pred["Probability"][idx_max] * 10 ** 3) / 10 ** 3
pred_conf = ut.conf_level(pred["Probability"][idx_max])
new_example = {
"sentence": user_text,
"model label": pred_sum,
"model label binary": pred_bin,
"probability": pred_num,
"confidence": pred_conf,
"user label": None,
"user label binary": None,
}
return new_example
def slice_misc(table):
""" Helper Function: format new slice"""
table = st.session_state["user_data"][
["sentence", "model label binary", "user label binary"]
]
table.columns = ["sentence", "pred", "label"]
dp = rg.DataPanel(
{
"sentence": table["sentence"].tolist(),
"label": table["label"].tolist(),
"pred": table["pred"].tolist(),
}
)
# give the sentence a name
dp._identifier = "Your Sentences"
# updated the dev bench
rg_bench = ut.new_bench()
rg_bench.add_slices(dp)
return rg_bench
# ***** ADDING CUSTOM SENTENCES *******
def examples():
""" DEPRECATED METHOD FOR UI for displaying the custom sentences"""
# writing the metrics out to a column
st.markdown("** Custom Example Sentences **")
if not st.session_state["user_data"].empty:
# remove the user data slice
# visualize the overall performance
st.markdown("*Model Performance*")
key = "Your Sentences"
all_metrics = {key: {}}
all_metrics[key]["metrics"] = st.session_state["quant_ex"][ "User Custom Sentence"][key]
all_metrics[key]["source"] = key
# chart = ut.visualize_metrics(st.session_state["quant_ex"]["User Custom Sentence"])
chart = ut.visualize_metrics(all_metrics, col_val="#ff7f0e")
st.altair_chart(chart)
# add to overall model performance
# visualize examples
st.markdown("*Examples*")
st.dataframe(
st.session_state["user_data"][
["sentence", "model label", "user label", "probability"]
]
)
else:
st.write("No examples added yet")
def example_sentence(sentence_examples, model,doc2vec):
""" UI for creating a custom sentences"""
# **** Entering Text ***
placeholder = st.empty()
user_text = placeholder.text_input(
"Write your own example sentences, or click 'Get Suggest Examples'",
st.session_state["example_sent"],
)
gen_button = st.button("Get Suggested Example", key="user_text")
if gen_button:
st.session_state["example_sent"] = sample(
set(sentence_examples["sentences"]), 1
)[0]
user_text = placeholder.text_input(
"Write your own example sentences, or click 'Get Suggested Example'",
st.session_state["example_sent"],
)
if user_text != "":
new_example = format_data(user_text, model)
# **** Prediction Summary ***
with st.form(key="my_form"):
st.markdown("**Model Prediction Summary**")
st.markdown(
f"*The sentiment model predicts that this sentence has an overall `{new_example['model label']}` with an `{new_example['confidence']}` (p={new_example['probability']})*"
)
# prediction agreement solicitation
st.markdown("**Do you agree with the prediction?**")
agreement = st.radio("Indicate your agreement below", ["Agree", "Disagree"])
# getting the user label
user_lab = new_example["model label"]
user_lab_bin = (
int(1) if new_example["model label"] == "Positive Sentiment" else int(0)
)
if agreement != "Agree":
user_lab = (
"Negative Sentiment"
if new_example["model label"] == "Positive Sentiment"
else "Positive Sentiment"
)
user_lab_bin = int(0) if user_lab_bin == 1 else int(1)
# update robustness gym with user_example prediction
if st.form_submit_button("Add to exisiting sentences"):
# updating the user data frame
if user_text != "":
new_example["user label"] = user_lab
new_example["user label binary"] = user_lab_bin
# data frame to append to session info
new_example = pd.DataFrame(new_example, index=[0])
# update the session
st.session_state["user_data"] = st.session_state[
"user_data"
].append(new_example, ignore_index=True)
# update the user data dev bench
user_bench = slice_misc(st.session_state["user_data"])
# add bench
st.session_state["quant_ex"][
"User Custom Sentence"
] = user_bench.metrics["model"]
#update the selected data
st.session_state["selected_slice"] = {
'name':'Your Sentences',
'source': 'User Custom Sentence',
}
#update the sentence with an embedding
embedding = st.session_state["embedding"]
tmp = ut.prep_sentence_embedding(name ='Your Sentences',
source = 'User Custom Sentence',
sentence = user_text,
sentiment= user_lab,
sort_order= 100, #always put it on top
embed_model = doc2vec,
idx = max(embedding.index)+1)
st.session_state["embedding"] = embedding.append(tmp)
# ***** DEFINTING CUSTOM SUBGROUPS *******
def subpopulation_slice(sst_db,doc2vec):
with st.form(key="subpop_form"):
st.markdown("Define you subpopulation")
user_terms = st.text_input(
"Enter a set of comma separated words", "comedy, hilarious, clown"
)
slice_choice = st.selectbox(
"Choose Data Source", ["Training Data", "Evaluation Data"]
)
slice_name = st.text_input(
"Give your subpopulation a name", "subpop_1", key="custom_slice_name"
)
if st.form_submit_button("Create Subpopulation"):
# build a new slice
user_terms = [x.strip() for x in user_terms.split(",")]
slice_builder = rg.HasAnyPhrase([user_terms], identifiers=[slice_name])
# on test data
slice_ids = ut.get_sliceid(list(sst_db.slices))
if slice_choice == "Training Data":
#st.write("returning training data")
idx = ut.get_sliceidx(slice_ids,"xyz_train")
else:
#st.write("returning evaluation data")
idx = ut.get_sliceidx(slice_ids,"xyz_test")
sst_db(slice_builder, list(sst_db.slices)[idx], ["sentence"])
#get store slice name
slice_ids = ut.get_sliceid(list(sst_db.slices))
slice_idx= [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)][0]
slice_rg_name = [elem for i, elem in enumerate(slice_ids) if slice_name in str(elem)]
slice_data = list(sst_db.slices)[slice_idx]
# updating the the selected slice
st.session_state["selected_slice"] = {
'name': slice_rg_name[0],
'source': 'Custom Slice',
}
#storing the slice terms
st.session_state["slice_terms"][slice_rg_name[0]] = user_terms
#adding slice to embedding
#update the sentence with an embedding
embedding = st.session_state["embedding"]
tmp = ut.prep_sentence_embedding(name = slice_name,
source = "Custom Slice",
sentence = slice_data['sentence'],
sentiment= ["Positive Sentiment" if int(round(x)) == 1 else "Negative Sentiment" for x in slice_data["label"]],
sort_order=5,
embed_model = doc2vec,
idx = max(embedding.index)+1,
type="multi")
st.session_state["embedding"] = embedding.append(tmp)
return slice_name
def slice_vis(terms, sst_db, slice_name):
''' DEPRECIATED FUNCTION TO VISUALIZE SLICE DATA'''
st.write(terms)
# TO DO - FORMATTING AND ADD METRICS
if len(list(sst_db.slices)) > 2:
# write out the dataset for this subset
# get selected slice data
slice_ids = ut.get_sliceid(list(sst_db.slices))
idx = [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)]
if len(idx) > 1:
raise ValueError("More than one slice with the same name")
else:
idx = idx[0]
if idx is not None:
slice_data = list(sst_db.slices)[idx]
slice_id = str(slice_data._identifier)
# visualize performance
all_metrics = ut.metrics_to_dict(sst_db.metrics["model"], slice_id)
chart = ut.visualize_metrics(all_metrics)
st.altair_chart(chart)
# write slice data to UI
st.dataframe(ut.slice_to_df(slice_data))
else:
st.write("No slice found")
# ***** EXAMPLE PANEL UI *******
def example_panel(sentence_examples, model, sst_db,doc2vec):
""" Layout for the custom example panel"""
# Data Expander
'''
st.markdown(
"Here's an overview of the ways you can add customized the performance results. Using the drop down menu above, you can choose from one of three options"
)
st.markdown(
"1. **Define a new subpopulation** : Create a new subset from the model's training or testing data"
)
st.markdown("1. **Add your own sentences** : Add your own sentences as examples")
st.markdown(
"3. **Add your own dataset** : Upload your own (small) dataset from a csv file"
)
'''
st.markdown("Modify the quantitative analysis results by defining your own subpopulations in the data, including your own data by adding your own sentences or dataset.")
with st.expander("Explore new subpopulations in model data"):
# create slice
slice_terms = subpopulation_slice(sst_db,doc2vec)
# visualize slice
slice_name = st.session_state["custom_slice_name"]
with st.expander("Explore with your own sentences"):
# adding a column for user text input
example_sentence(sentence_examples, model,doc2vec)
# examples()
with st.expander("Explore with your own dataset"):
st.error("This feature is not enabled for the online deployment")
__all__=["example_panel"]