Spaces:

JVice
/

try-before-you-bias

Sleeping

App Files Files Community

try-before-you-bias / model_comparison.py

JVice

Update model_comparison.py

c62b123 verified 8 days ago

raw history blame contribute delete

No virus

8.05 kB

	import pandas as pd
	import streamlit as st
	import numpy as np
	import plotly.express as px
	from yaml import safe_load
	import user_evaluation_variables
	from pathlib import Path
	from huggingface_hub import hf_hub_download, CommitScheduler, login
	from profanity_check import predict

	databaseDF = None

	# EVAL_DATABASE_DIR = Path("data")
	# EVAL_DATABASE_DIR.mkdir(parents=True, exist_ok=True)

	GEN_EVAL_DATABASE_PATH = 'data/general_eval_database.yaml'
	TASK_EVAL_DATABASE_PATH = 'data/task_oriented_eval_database.yaml'
	# GEN_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"general_eval_database.yaml"
	# TASK_EVAL_DATABASE_PATH = EVAL_DATABASE_DIR / f"task_oriented_eval_database.yaml"

	# EVAL_DATABASE_UPDATE_SCHEDULER = CommitScheduler(
	# repo_id="try-before-you-bias-data",
	# repo_type="dataset",
	# folder_path=EVAL_DATABASE_DIR,
	# path_in_repo="data",
	# every=2,
	# )

	# def get_evaluation_id(evalType, debugging):
	# global GEN_EVAL_DATABASE_PATH
	# global TASK_EVAL_DATABASE_PATH

	# if evalType == 'general':
	# DFPath = GEN_EVAL_DATABASE_PATH
	# else:
	# DFPath = TASK_EVAL_DATABASE_PATH

	# df = add_user_evalID_columns_to_df(None, DFPath, False)
	# evalColumn = [int(x.split('_')[1]) for x in list(df['Eval. ID'])]

	# newEvalID = max(evalColumn) + 1
	# if evalType == 'general':
	# newEvalID = 'G_'+str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))
	# else:
	# newEvalID = 'T_' + str(newEvalID).zfill(len(list(df['Eval. ID'])[0].split('_')[1]))

	# if debugging:
	# st.write(df['Eval. ID'])
	# st.write(evalColumn)
	# st.write("current last EVAL ID:", df['Eval. ID'].iloc[-1])
	# st.write("NEW EVAL ID:", newEvalID)
	# return newEvalID

	def check_profanity(df):
	cleanedDF = df
	for i, row in cleanedDF.iterrows():
	if 'Target' in df:
	if predict([row['Target']])[0] != 0.0:
	cleanedDF.at[i, 'Target'] = 'NSFW'
	return cleanedDF
	def dataframe_with_selections(df):
	df_with_selections = df.copy()
	df_with_selections = check_profanity(df_with_selections)
	df_with_selections.insert(0, "Select", True)

	# Get dataframe row-selections from user with st.data_editor
	edited_df = st.data_editor(
	df_with_selections,
	hide_index=True,
	column_config={"Select": st.column_config.CheckboxColumn(required=True)},
	disabled=df.columns,
	)

	# Filter the dataframe using the temporary column, then drop the column
	selected_rows = edited_df[edited_df.Select]
	return selected_rows.drop('Select', axis=1)
	def add_user_evalID_columns_to_df(df, evalDataPath):
	# with open(evalDataPath, 'r') as f:
	# yamlData = safe_load(f)
	# yamlData = safe_load()
	with open(hf_hub_download(repo_id="JVice/try-before-you-bias-data", filename=evalDataPath, repo_type="dataset"), 'r') as f:
	yamlData = safe_load(f)
	for user in yamlData['evaluations']['username']:
	if df is None:
	df = pd.DataFrame(yamlData['evaluations']['username'][user]).T
	df.insert(0, "Eval. ID", list(yamlData['evaluations']['username'][user].keys()), True)
	else:
	df = pd.concat([df, pd.DataFrame(yamlData['evaluations']['username'][user]).T],
	ignore_index=True)
	evalIDIterator = 0
	for index, row in df.iterrows():
	if row['Eval. ID'] is np.nan:
	df.loc[index, 'Eval. ID'] = list(yamlData['evaluations']['username'][user].keys())[
	evalIDIterator]
	evalIDIterator += 1
	return df

	def initialise_page(tab):
	global databaseDF
	global GEN_EVAL_DATABASE_PATH
	global TASK_EVAL_DATABASE_PATH
	with tab:
	c1, c2 = st.columns(2)
	with c1:
	st.subheader("\U0001F30E General Bias")
	with st.form("gen_bias_database_loading_form", clear_on_submit=False):
	communityGEN = st.form_submit_button("TBYB Community Evaluations")
	if communityGEN:
	databaseDF = None
	databaseDF = add_user_evalID_columns_to_df(databaseDF, GEN_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
	"Objects", "Actions", "Occupations", "Dist. Bias", "Hallucination", "Gen. Miss Rate",
	"Run Time", "Date", "Time"]]
	with c2:
	st.subheader("\U0001F3AF Task-Oriented Bias")
	with st.form("task_oriented_database_loading_form", clear_on_submit=False):
	communityTASK = st.form_submit_button("TBYB Community Evaluations")
	if communityTASK:
	databaseDF = None
	databaseDF = add_user_evalID_columns_to_df(databaseDF, TASK_EVAL_DATABASE_PATH)[["Eval. ID", "Model", "Model Type", "Resolution", "No. Samples", "Inference Steps",
	"Target", "Dist. Bias", "Hallucination", "Gen. Miss Rate", "Run Time", "Date", "Time"]]
	if databaseDF is not None:
	selection = dataframe_with_selections(databaseDF)
	normalised = st.toggle('Normalize Data (better for direct comparisons)')
	submitCOMPARE = st.button("Compare Selected Models")

	if submitCOMPARE:
	plot_comparison_graphs(tab, selection, normalised)

	def normalise_data(rawValues, metric):
	rawValues = list(map(float, rawValues))
	normalisedValues = []
	# Normalise the raw data
	for x in rawValues:
	if (max(rawValues) - min(rawValues)) == 0:
	normX = 1
	else:
	if metric in ['HJ','MG']:
	normX = (x - min(rawValues)) / (max(rawValues) - min(rawValues))
	else:
	normX = 1 - ((x - min(rawValues)) / (max(rawValues) - min(rawValues)))
	normalisedValues.append(normX)

	return normalisedValues
	def plot_comparison_graphs(tab, data,normalise):
	BDColor = ['#59DC23', ] * len(data['Dist. Bias'].tolist())
	HJColor = ['#2359DC', ] * len(data['Hallucination'].tolist())
	MGColor = ['#DC2359', ] * len(data['Gen. Miss Rate'].tolist())
	if not normalise:
	BDData = data['Dist. Bias']
	HJData = data['Hallucination']
	MGData = data['Gen. Miss Rate']
	else:
	data['Dist. Bias'] = normalise_data(data['Dist. Bias'], 'BD')
	data['Hallucination'] = normalise_data(data['Hallucination'], 'HJ')
	data['Gen. Miss Rate'] = normalise_data(data['Gen. Miss Rate'], 'MG')
	with tab:
	st.write("Selected evaluations for comparison:")
	st.write(data)

	BDFig = px.bar(x=data['Eval. ID'], y=data['Dist. Bias'],color_discrete_sequence=BDColor).update_layout(
	xaxis_title=r'Evaluation ID', yaxis_title=r'Distribution Bias', title=r'Distribution Bias Comparison')
	st.plotly_chart(BDFig, theme="streamlit",use_container_width=True)

	HJFig = px.bar(x=data['Eval. ID'], y=data['Hallucination'],color_discrete_sequence=HJColor).update_layout(
	xaxis_title=r'Evaluation ID', yaxis_title=r'Jaccard Hallucination', title=r'Jaccard Hallucination Comparison')
	st.plotly_chart(HJFig, theme="streamlit",use_container_width=True)

	MGFig = px.bar(x=data['Eval. ID'], y=data['Gen. Miss Rate'],color_discrete_sequence=MGColor).update_layout(
	xaxis_title=r'Evaluation ID', yaxis_title=r'Generative Miss Rate', title=r'Generative Miss Rate Comparison')
	st.plotly_chart(MGFig, theme="streamlit",use_container_width=True)
	if normalise:

	Full3DFig = px.scatter_3d(data, x='Dist. Bias', y='Hallucination', z='Gen. Miss Rate',
	width=800, height=800,color='Eval. ID',title='3D Text-to-Image Model Bias Comparison')
	st.plotly_chart(Full3DFig, theme="streamlit",use_container_width=True)