Spaces:
Running
Running
File size: 5,291 Bytes
0b5c5aa 839ca71 5defafa 117a821 245d4fa 35b059b 117a821 39654c5 a7883dd 88009b8 39654c5 245d4fa 39654c5 245d4fa 39654c5 245d4fa 39654c5 245d4fa 39654c5 245d4fa cc7e22e 245d4fa cbbb1a3 cc7e22e 4bf4df2 a7883dd 4bf4df2 cc7e22e 245d4fa cc7e22e bedb44d cc7e22e 5defafa cc7e22e d7128fd 5defafa cc7e22e 29635b7 cc7e22e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import streamlit as st
import pandas as pd
from io import StringIO
from generation import process_scores
from model import AzureAgent, GPTAgent
from analysis import statistical_tests, result_evaluation
# Set up the Streamlit interface
st.title('JobFair: A Benchmark for Fairness in LLM Employment Decision')
st.sidebar.title('Model Settings')
# Define a function to manage state initialization
def initialize_state():
keys = ["model_submitted", "api_key", "endpoint_url", "deployment_name", "temperature", "max_tokens",
"data_processed", "group_name","occupation", "privilege_label", "protect_label", "num_run", "uploaded_file"]
defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.5, 150, False,"Gender", "Programmer", "Male", "Female", 1, None]
for key, default in zip(keys, defaults):
if key not in st.session_state:
st.session_state[key] = default
initialize_state()
# Model selection and configuration
model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent'))
st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
if st.sidebar.button("Reset Model Info"):
initialize_state() # Reset all state to defaults
st.experimental_rerun()
if st.sidebar.button("Submit Model Info"):
st.session_state.model_submitted = True
# Ensure experiment settings are only shown if model info is submitted
if st.session_state.model_submitted:
df = None
file_options = st.radio("Choose file source:", ["Upload", "Example"])
if file_options == "Example":
df = pd.read_csv("prompt_test.csv")
else:
st.session_state.uploaded_file = st.file_uploader("Choose a file")
if st.session_state.uploaded_file is not None:
data = StringIO(st.session_state.uploaded_file.getvalue().decode("utf-8"))
df = pd.read_csv(data)
if df is not None:
st.write('Data:', df)
st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
if st.button('Process Data') and not st.session_state.data_processed:
# Initialize the correct agent based on model type
if model_type == 'AzureAgent':
agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url, st.session_state.deployment_name)
else:
agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url, st.session_state.deployment_name, api_version)
# Process data and display results
with st.spinner('Processing data...'):
parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
df = process_scores(df, st.session_state.num_run, parameters, st.session_state.privilege_label, st.session_state.protect_label, agent, st.session_state.group_name, st.session_state.occupation)
st.session_state.data_processed = True # Mark as processed
st.write('Processed Data:', df)
# use the data to generate a plot
st.write("Plotting the data")
# Add ranks for each score within each row
df['Privilege_Rank'] = \
df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1)[
'Privilege_Avg_Score']
df['Protect_Rank'] = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1)[
'Protect_Avg_Score']
df['Neutral_Rank'] = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1)[
'Neutral_Avg_Score']
test_results = statistical_tests(df)
evaluation_results = result_evaluation(test_results)
for key, value in evaluation_results.items():
st.write(f"{key}: {value}")
if st.button("Reset Experiment Settings"):
st.session_state.occupation = "Programmer"
st.session_state.group_name = "Gender"
st.session_state.privilege_label = "Male"
st.session_state.protect_label = "Female"
st.session_state.num_run = 1
st.session_state.data_processed = False
st.session_state.uploaded_file = None |