Spaces:

nazneen
/

interactive-model-cards

Build error

App Files Files Community

interactive-model-cards / interactive_model_cards /app_layout /example_panel.py

nazneen

interactive model card streamlit app

90f4ec6 over 2 years ago

raw history blame

No virus

11.7 kB

	# --- Streamlit ---
	import streamlit as st

	# --- Data ---
	import robustnessgym as rg
	import pandas as pd

	# --- Misc ---
	from math import floor
	from random import sample
	from interactive_model_cards import utils as ut


	def format_data(user_text, model):
	""" Helper Function : Formatting and preparing the user's input data"""

	# adding user data to the data panel
	dp = rg.DataPanel({"sentence": [user_text], "label": [1]})

	# run prediction
	dp, pred = ut.update_pred(dp, model)

	# summarizing the prediction

	idx_max = pred["Probability"].argmax()
	pred_sum = pred["Label"][idx_max]
	pred_bin = int(1) if pred["Label"][idx_max] == "Positive Sentiment" else int(0)
	pred_num = floor(pred["Probability"][idx_max] * 10 3) / 10 3
	pred_conf = ut.conf_level(pred["Probability"][idx_max])

	new_example = {
	"sentence": user_text,
	"model label": pred_sum,
	"model label binary": pred_bin,
	"probability": pred_num,
	"confidence": pred_conf,
	"user label": None,
	"user label binary": None,
	}

	return new_example


	def slice_misc(table):
	""" Helper Function: format new slice"""
	table = st.session_state["user_data"][
	["sentence", "model label binary", "user label binary"]
	]
	table.columns = ["sentence", "pred", "label"]

	dp = rg.DataPanel(
	{
	"sentence": table["sentence"].tolist(),
	"label": table["label"].tolist(),
	"pred": table["pred"].tolist(),
	}
	)

	# give the sentence a name
	dp._identifier = "Your Sentences"

	# updated the dev bench
	rg_bench = ut.new_bench()
	rg_bench.add_slices(dp)

	return rg_bench


	# *** ADDING CUSTOM SENTENCES *****
	def examples():
	""" DEPRECATED METHOD FOR UI for displaying the custom sentences"""

	# writing the metrics out to a column
	st.markdown(" Custom Example Sentences ")

	if not st.session_state["user_data"].empty:
	# remove the user data slice

	# visualize the overall performance
	st.markdown("Model Performance")
	key = "Your Sentences"
	all_metrics = {key: {}}
	all_metrics[key]["metrics"] = st.session_state["quant_ex"][ "User Custom Sentence"][key]
	all_metrics[key]["source"] = key

	# chart = ut.visualize_metrics(st.session_state["quant_ex"]["User Custom Sentence"])
	chart = ut.visualize_metrics(all_metrics, col_val="#ff7f0e")
	st.altair_chart(chart)

	# add to overall model performance
	# visualize examples
	st.markdown("Examples")
	st.dataframe(
	st.session_state["user_data"][
	["sentence", "model label", "user label", "probability"]
	]
	)
	else:
	st.write("No examples added yet")


	def example_sentence(sentence_examples, model,doc2vec):
	""" UI for creating a custom sentences"""

	# ** Entering Text *
	placeholder = st.empty()
	user_text = placeholder.text_input(
	"Write your own example sentences, or click 'Get Suggest Examples'",
	st.session_state["example_sent"],
	)

	gen_button = st.button("Get Suggested Example", key="user_text")

	if gen_button:
	st.session_state["example_sent"] = sample(
	set(sentence_examples["sentences"]), 1
	)[0]

	user_text = placeholder.text_input(
	"Write your own example sentences, or click 'Get Suggested Example'",
	st.session_state["example_sent"],
	)

	if user_text != "":

	new_example = format_data(user_text, model)

	# ** Prediction Summary *
	with st.form(key="my_form"):
	st.markdown("Model Prediction Summary")
	st.markdown(
	f"The sentiment model predicts that this sentence has an overall `{new_example['model label']}` with an `{new_example['confidence']}` (p={new_example['probability']})"
	)

	# prediction agreement solicitation
	st.markdown("Do you agree with the prediction?")
	agreement = st.radio("Indicate your agreement below", ["Agree", "Disagree"])

	# getting the user label
	user_lab = new_example["model label"]
	user_lab_bin = (
	int(1) if new_example["model label"] == "Positive Sentiment" else int(0)
	)

	if agreement != "Agree":
	user_lab = (
	"Negative Sentiment"
	if new_example["model label"] == "Positive Sentiment"
	else "Positive Sentiment"
	)
	user_lab_bin = int(0) if user_lab_bin == 1 else int(1)

	# update robustness gym with user_example prediction
	if st.form_submit_button("Add to exisiting sentences"):
	# updating the user data frame
	if user_text != "":
	new_example["user label"] = user_lab
	new_example["user label binary"] = user_lab_bin

	# data frame to append to session info
	new_example = pd.DataFrame(new_example, index=[0])

	# update the session
	st.session_state["user_data"] = st.session_state[
	"user_data"
	].append(new_example, ignore_index=True)

	# update the user data dev bench
	user_bench = slice_misc(st.session_state["user_data"])

	# add bench
	st.session_state["quant_ex"][
	"User Custom Sentence"
	] = user_bench.metrics["model"]

	#update the selected data
	st.session_state["selected_slice"] = {
	'name':'Your Sentences',
	'source': 'User Custom Sentence',
	}

	#update the sentence with an embedding
	embedding = st.session_state["embedding"]
	tmp = ut.prep_sentence_embedding(name ='Your Sentences',
	source = 'User Custom Sentence',
	sentence = user_text,
	sentiment= user_lab,
	sort_order= 100, #always put it on top
	embed_model = doc2vec,
	idx = max(embedding.index)+1)

	st.session_state["embedding"] = embedding.append(tmp)

	# *** DEFINTING CUSTOM SUBGROUPS *****
	def subpopulation_slice(sst_db,doc2vec):
	with st.form(key="subpop_form"):
	st.markdown("Define you subpopulation")
	user_terms = st.text_input(
	"Enter a set of comma separated words", "comedy, hilarious, clown"
	)
	slice_choice = st.selectbox(
	"Choose Data Source", ["Training Data", "Evaluation Data"]
	)
	slice_name = st.text_input(
	"Give your subpopulation a name", "subpop_1", key="custom_slice_name"
	)
	if st.form_submit_button("Create Subpopulation"):
	# build a new slice
	user_terms = [x.strip() for x in user_terms.split(",")]
	slice_builder = rg.HasAnyPhrase([user_terms], identifiers=[slice_name])

	# on test data
	slice_ids = ut.get_sliceid(list(sst_db.slices))
	if slice_choice == "Training Data":
	#st.write("returning training data")
	idx = ut.get_sliceidx(slice_ids,"xyz_train")
	else:
	#st.write("returning evaluation data")
	idx = ut.get_sliceidx(slice_ids,"xyz_test")

	sst_db(slice_builder, list(sst_db.slices)[idx], ["sentence"])

	#get store slice name
	slice_ids = ut.get_sliceid(list(sst_db.slices))
	slice_idx= [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)][0]
	slice_rg_name = [elem for i, elem in enumerate(slice_ids) if slice_name in str(elem)]

	slice_data = list(sst_db.slices)[slice_idx]


	# updating the the selected slice
	st.session_state["selected_slice"] = {
	'name': slice_rg_name[0],
	'source': 'Custom Slice',
	}

	#storing the slice terms
	st.session_state["slice_terms"][slice_rg_name[0]] = user_terms

	#adding slice to embedding
	#update the sentence with an embedding

	embedding = st.session_state["embedding"]
	tmp = ut.prep_sentence_embedding(name = slice_name,
	source = "Custom Slice",
	sentence = slice_data['sentence'],
	sentiment= ["Positive Sentiment" if int(round(x)) == 1 else "Negative Sentiment" for x in slice_data["label"]],
	sort_order=5,
	embed_model = doc2vec,
	idx = max(embedding.index)+1,
	type="multi")

	st.session_state["embedding"] = embedding.append(tmp)

	return slice_name


	def slice_vis(terms, sst_db, slice_name):
	''' DEPRECIATED FUNCTION TO VISUALIZE SLICE DATA'''
	st.write(terms)
	# TO DO - FORMATTING AND ADD METRICS
	if len(list(sst_db.slices)) > 2:
	# write out the dataset for this subset

	# get selected slice data
	slice_ids = ut.get_sliceid(list(sst_db.slices))
	idx = [i for i, elem in enumerate(slice_ids) if slice_name in str(elem)]

	if len(idx) > 1:
	raise ValueError("More than one slice with the same name")
	else:
	idx = idx[0]

	if idx is not None:
	slice_data = list(sst_db.slices)[idx]
	slice_id = str(slice_data._identifier)

	# visualize performance
	all_metrics = ut.metrics_to_dict(sst_db.metrics["model"], slice_id)
	chart = ut.visualize_metrics(all_metrics)
	st.altair_chart(chart)

	# write slice data to UI
	st.dataframe(ut.slice_to_df(slice_data))
	else:
	st.write("No slice found")


	# *** EXAMPLE PANEL UI *****
	def example_panel(sentence_examples, model, sst_db,doc2vec):
	""" Layout for the custom example panel"""

	# Data Expander
	'''
	st.markdown(
	"Here's an overview of the ways you can add customized the performance results. Using the drop down menu above, you can choose from one of three options"
	)
	st.markdown(
	"1. Define a new subpopulation : Create a new subset from the model's training or testing data"
	)
	st.markdown("1. Add your own sentences : Add your own sentences as examples")
	st.markdown(
	"3. Add your own dataset : Upload your own (small) dataset from a csv file"
	)
	'''
	st.markdown("Modify the quantitative analysis results by defining your own subpopulations in the data, including your own data by adding your own sentences or dataset.")

	with st.expander("Explore new subpopulations in model data"):
	# create slice
	slice_terms = subpopulation_slice(sst_db,doc2vec)

	# visualize slice
	slice_name = st.session_state["custom_slice_name"]

	with st.expander("Explore with your own sentences"):
	# adding a column for user text input
	example_sentence(sentence_examples, model,doc2vec)
	# examples()
	with st.expander("Explore with your own dataset"):
	st.error("This feature is not enabled for the online deployment")
	__all__=["example_panel"]