Zekun Wu commited on
Commit
86607a2
β€’
1 Parent(s): 225f4f1
pages/{2_Evaluation_Demo.py β†’ 1_Generation.py} RENAMED
@@ -36,9 +36,6 @@ if st.sidebar.button("Reset Model Info"):
36
  if st.sidebar.button("Submit Model Info"):
37
  st.session_state.model_submitted = True
38
 
39
-
40
-
41
-
42
  # Ensure experiment settings are only shown if model info is submitted
43
  if st.session_state.model_submitted:
44
  df = None
@@ -54,6 +51,8 @@ if st.session_state.model_submitted:
54
 
55
  st.write('Data:', df)
56
 
 
 
57
  st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
58
  st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
59
  st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
@@ -73,25 +72,20 @@ if st.session_state.model_submitted:
73
  df = process_scores(df, st.session_state.num_run, parameters, st.session_state.privilege_label, st.session_state.protect_label, agent, st.session_state.group_name, st.session_state.occupation)
74
  st.session_state.data_processed = True # Mark as processed
75
 
76
- # Add ranks for each score within each row
77
- ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1,ascending=False)
78
-
79
- df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
80
- df['Protect_Rank'] = ranks['Protect_Avg_Score']
81
- df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
82
 
83
  st.write('Processed Data:', df)
84
 
85
- # use the data to generate a plot
86
- st.write("Plotting the data")
 
 
 
 
 
 
 
87
 
88
- test_results = statistical_tests(df)
89
- print(test_results)
90
- evaluation_results = result_evaluation(test_results)
91
- print(evaluation_results)
92
 
93
- for key, value in evaluation_results.items():
94
- st.write(f"{key}: {value}")
95
 
96
 
97
  if st.button("Reset Experiment Settings"):
 
36
  if st.sidebar.button("Submit Model Info"):
37
  st.session_state.model_submitted = True
38
 
 
 
 
39
  # Ensure experiment settings are only shown if model info is submitted
40
  if st.session_state.model_submitted:
41
  df = None
 
51
 
52
  st.write('Data:', df)
53
 
54
+ # Button to add a new row
55
+
56
  st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
57
  st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
58
  st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
 
72
  df = process_scores(df, st.session_state.num_run, parameters, st.session_state.privilege_label, st.session_state.protect_label, agent, st.session_state.group_name, st.session_state.occupation)
73
  st.session_state.data_processed = True # Mark as processed
74
 
 
 
 
 
 
 
75
 
76
  st.write('Processed Data:', df)
77
 
78
+ # Allow downloading of the evaluation results
79
+ st.download_button(
80
+ label="Download Evaluation Results",
81
+ data=df.to_csv().encode('utf-8'),
82
+ file_name='generation_results.csv',
83
+ mime='text/csv',
84
+ )
85
+
86
+
87
 
 
 
 
 
88
 
 
 
89
 
90
 
91
  if st.button("Reset Experiment Settings"):
pages/1_Generation_Demo.py DELETED
@@ -1,126 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from io import StringIO
4
- from util.generation import process_scores
5
- from util.model import AzureAgent, GPTAgent
6
- from util.analysis import statistical_tests, result_evaluation
7
-
8
- # Set up the Streamlit interface
9
- st.title('JobFair: A Benchmark for Fairness in LLM Employment Decision')
10
- st.sidebar.title('Model Settings')
11
-
12
- # Define a function to manage state initialization
13
- def initialize_state():
14
- keys = ["model_submitted", "api_key", "endpoint_url", "deployment_name", "temperature", "max_tokens",
15
- "data_processed", "group_name","occupation", "privilege_label", "protect_label", "num_run", "uploaded_file"]
16
- defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.5, 150, False,"Gender", "Programmer", "Male", "Female", 1, None]
17
- for key, default in zip(keys, defaults):
18
- if key not in st.session_state:
19
- st.session_state[key] = default
20
-
21
- initialize_state()
22
-
23
- # Model selection and configuration
24
- model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent'))
25
- st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
26
- st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
27
- st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
28
- api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
29
- st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
30
- st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
31
-
32
- if st.sidebar.button("Reset Model Info"):
33
- initialize_state() # Reset all state to defaults
34
- st.experimental_rerun()
35
-
36
- if st.sidebar.button("Submit Model Info"):
37
- st.session_state.model_submitted = True
38
-
39
-
40
- def add_row(df):
41
- # Add a new row with default or empty values at the end of the DataFrame
42
- new_row = pd.DataFrame([{col: "" for col in df.columns}])
43
- return pd.concat([df, new_row], ignore_index=True)
44
-
45
- def remove_row(df, index):
46
- # Remove a row based on the index provided
47
- return df.drop(index, errors='ignore').reset_index(drop=True)
48
-
49
-
50
- # Ensure experiment settings are only shown if model info is submitted
51
- if st.session_state.model_submitted:
52
- df = None
53
- file_options = st.radio("Choose file source:", ["Upload", "Example"])
54
- if file_options == "Example":
55
- df = pd.read_csv("prompt_test.csv")
56
- else:
57
- st.session_state.uploaded_file = st.file_uploader("Choose a file")
58
- if st.session_state.uploaded_file is not None:
59
- data = StringIO(st.session_state.uploaded_file.getvalue().decode("utf-8"))
60
- df = pd.read_csv(data)
61
- if df is not None:
62
-
63
- st.write('Data:', df)
64
-
65
- # Button to add a new row
66
- if st.button('Add Row'):
67
- df = add_row(df)
68
- st.session_state.uploaded_file = StringIO(
69
- df.to_csv(index=False)) # Update the session file after modification
70
-
71
- # Input for row index to remove
72
- row_to_remove = st.number_input('Enter row index to remove', min_value=0, max_value=len(df) - 1, step=1,
73
- format='%d')
74
- if st.button('Remove Row'):
75
- df = remove_row(df, row_to_remove)
76
- st.session_state.uploaded_file = StringIO(
77
- df.to_csv(index=False)) # Update the session file after modification
78
-
79
- st.session_state.occupation = st.text_input("Occupation", value=st.session_state.occupation)
80
- st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
81
- st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
82
- st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
83
- st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
84
-
85
- if st.button('Process Data') and not st.session_state.data_processed:
86
- # Initialize the correct agent based on model type
87
- if model_type == 'AzureAgent':
88
- agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url, st.session_state.deployment_name)
89
- else:
90
- agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url, st.session_state.deployment_name, api_version)
91
-
92
- # Process data and display results
93
- with st.spinner('Processing data...'):
94
- parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
95
- df = process_scores(df, st.session_state.num_run, parameters, st.session_state.privilege_label, st.session_state.protect_label, agent, st.session_state.group_name, st.session_state.occupation)
96
- st.session_state.data_processed = True # Mark as processed
97
-
98
- # Add ranks for each score within each row
99
- ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1,ascending=False)
100
-
101
- df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
102
- df['Protect_Rank'] = ranks['Protect_Avg_Score']
103
- df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
104
-
105
- st.write('Processed Data:', df)
106
-
107
- # use the data to generate a plot
108
- st.write("Plotting the data")
109
-
110
- test_results = statistical_tests(df)
111
- print(test_results)
112
- evaluation_results = result_evaluation(test_results)
113
- print(evaluation_results)
114
-
115
- for key, value in evaluation_results.items():
116
- st.write(f"{key}: {value}")
117
-
118
-
119
- if st.button("Reset Experiment Settings"):
120
- st.session_state.occupation = "Programmer"
121
- st.session_state.group_name = "Gender"
122
- st.session_state.privilege_label = "Male"
123
- st.session_state.protect_label = "Female"
124
- st.session_state.num_run = 1
125
- st.session_state.data_processed = False
126
- st.session_state.uploaded_file = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/2_Evaluation.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from io import StringIO
4
+ from util.analysis import statistical_tests, result_evaluation
5
+
6
+ def app():
7
+ st.title('Data Evaluation for Fairness in LLM Employment Decision')
8
+
9
+ # Allow users to upload a CSV file with processed results
10
+ uploaded_file = st.file_uploader("Upload your processed CSV file", type="csv")
11
+ if uploaded_file is not None:
12
+ data = StringIO(uploaded_file.getvalue().decode('utf-8'))
13
+ df = pd.read_csv(data)
14
+
15
+ # Add ranks for each score within each row
16
+ ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
17
+
18
+ df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
19
+ df['Protect_Rank'] = ranks['Protect_Avg_Score']
20
+ df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
21
+
22
+ st.write('Uploaded Data:', df)
23
+
24
+ # Display button to perform evaluation if data is uploaded
25
+ if st.button('Evaluate Data'):
26
+ with st.spinner('Evaluating data...'):
27
+ test_results = statistical_tests(df)
28
+ evaluation_results = result_evaluation(test_results)
29
+ st.write('Evaluation Results:', evaluation_results)
30
+
31
+ # Allow downloading of the evaluation results
32
+ results_df = pd.DataFrame.from_dict(evaluation_results, orient='index', columns=['Value'])
33
+ st.download_button(
34
+ label="Download Evaluation Results",
35
+ data=results_df.to_csv().encode('utf-8'),
36
+ file_name='evaluation_results.csv',
37
+ mime='text/csv',
38
+ )
39
+
40
+ if __name__ == "__main__":
41
+ app()