Zekun Wu commited on
Commit
b25bb07
1 Parent(s): 6807929
Files changed (3) hide show
  1. experiment.ipynb +201 -0
  2. util/evaluation.py +28 -7
  3. util/injection.py +8 -7
experiment.ipynb ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "initial_id",
7
+ "metadata": {
8
+ "collapsed": true,
9
+ "is_executing": true,
10
+ "ExecuteTime": {
11
+ "start_time": "2024-05-31T11:06:03.089830Z"
12
+ }
13
+ },
14
+ "outputs": [
15
+ {
16
+ "name": "stdout",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "Processing 100 entries with 1 runs each.\n"
20
+ ]
21
+ },
22
+ {
23
+ "name": "stderr",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "Processing runs: 0%| | 0/1 [00:00<?, ?run/s]\n",
27
+ "Processing entries: 0%| | 0/100 [00:00<?, ?entry/s]\u001B[A\n",
28
+ "Processing entries: 1%| | 1/100 [00:47<1:17:58, 47.26s/entry]\u001B[A\n",
29
+ "Processing entries: 2%|▏ | 2/100 [01:15<58:51, 36.04s/entry] \u001B[A\n",
30
+ "Processing entries: 3%|▎ | 3/100 [01:49<56:30, 34.95s/entry]\u001B[A\n",
31
+ "Processing entries: 4%|▍ | 4/100 [02:21<54:34, 34.11s/entry]\u001B[A\n",
32
+ "Processing entries: 5%|▌ | 5/100 [02:59<56:11, 35.49s/entry]\u001B[A\n",
33
+ "Processing entries: 6%|▌ | 6/100 [03:35<55:33, 35.46s/entry]\u001B[A\n",
34
+ "Processing entries: 7%|▋ | 7/100 [04:12<55:48, 36.00s/entry]\u001B[A\n",
35
+ "Processing entries: 8%|▊ | 8/100 [04:52<57:20, 37.40s/entry]\u001B[A\n",
36
+ "Processing entries: 9%|▉ | 9/100 [05:19<51:31, 33.97s/entry]\u001B[A\n",
37
+ "Processing entries: 10%|█ | 10/100 [15:46<5:25:34, 217.06s/entry]\u001B[A\n",
38
+ "Processing entries: 11%|█ | 11/100 [16:11<3:55:07, 158.51s/entry]\u001B[A\n",
39
+ "Processing entries: 12%|█▏ | 12/100 [17:15<3:09:53, 129.48s/entry]\u001B[A\n",
40
+ "Processing entries: 13%|█▎ | 13/100 [17:54<2:28:14, 102.23s/entry]\u001B[A\n",
41
+ "Processing entries: 14%|█▍ | 14/100 [18:32<1:58:40, 82.79s/entry] \u001B[A\n",
42
+ "Processing entries: 15%|█▌ | 15/100 [19:00<1:34:01, 66.37s/entry]\u001B[A\n",
43
+ "Processing entries: 16%|█▌ | 16/100 [19:45<1:23:45, 59.83s/entry]\u001B[A\n",
44
+ "Processing entries: 17%|█▋ | 17/100 [20:27<1:15:23, 54.50s/entry]\u001B[A\n",
45
+ "Processing entries: 18%|█▊ | 18/100 [20:55<1:03:40, 46.59s/entry]\u001B[A\n",
46
+ "Processing entries: 19%|█▉ | 19/100 [21:27<56:41, 42.00s/entry] \u001B[A\n",
47
+ "Processing entries: 20%|██ | 20/100 [22:12<57:27, 43.09s/entry]\u001B[A\n",
48
+ "Processing entries: 21%|██ | 21/100 [22:41<51:05, 38.81s/entry]\u001B[A\n",
49
+ "Processing entries: 22%|██▏ | 22/100 [23:06<45:15, 34.81s/entry]\u001B[A\n",
50
+ "Processing entries: 23%|██▎ | 23/100 [23:50<48:06, 37.49s/entry]\u001B[A\n",
51
+ "Processing entries: 24%|██▍ | 24/100 [24:21<44:54, 35.46s/entry]\u001B[A\n",
52
+ "Processing entries: 25%|██▌ | 25/100 [25:00<45:50, 36.68s/entry]\u001B[A\n",
53
+ "Processing entries: 26%|██▌ | 26/100 [25:35<44:32, 36.12s/entry]\u001B[A\n",
54
+ "Processing entries: 27%|██▋ | 27/100 [26:04<41:14, 33.89s/entry]\u001B[A\n",
55
+ "Processing entries: 28%|██▊ | 28/100 [26:29<37:26, 31.19s/entry]\u001B[A\n",
56
+ "Processing entries: 29%|██▉ | 29/100 [27:04<38:24, 32.46s/entry]\u001B[A\n",
57
+ "Processing entries: 30%|███ | 30/100 [27:38<38:15, 32.79s/entry]\u001B[A\n",
58
+ "Processing entries: 31%|███ | 31/100 [28:20<40:55, 35.59s/entry]\u001B[A\n",
59
+ "Processing entries: 32%|███▏ | 32/100 [29:08<44:23, 39.18s/entry]\u001B[A\n",
60
+ "Processing entries: 33%|███▎ | 33/100 [29:37<40:20, 36.13s/entry]\u001B[A\n",
61
+ "Processing entries: 34%|███▍ | 34/100 [30:23<43:02, 39.13s/entry]\u001B[A\n",
62
+ "Processing entries: 35%|███▌ | 35/100 [31:19<47:58, 44.28s/entry]\u001B[A\n",
63
+ "Processing entries: 36%|███▌ | 36/100 [32:01<46:39, 43.75s/entry]\u001B[A\n",
64
+ "Processing entries: 37%|███▋ | 37/100 [32:27<40:11, 38.28s/entry]\u001B[A\n",
65
+ "Processing entries: 38%|███▊ | 38/100 [32:53<35:51, 34.71s/entry]\u001B[A\n",
66
+ "Processing entries: 39%|███▉ | 39/100 [33:31<36:05, 35.50s/entry]\u001B[A\n",
67
+ "Processing entries: 40%|████ | 40/100 [34:11<37:01, 37.02s/entry]\u001B[A\n",
68
+ "Processing entries: 41%|████ | 41/100 [34:39<33:41, 34.27s/entry]\u001B[A\n",
69
+ "Processing entries: 42%|████▏ | 42/100 [35:23<35:54, 37.15s/entry]\u001B[A\n",
70
+ "Processing entries: 43%|████▎ | 43/100 [35:50<32:32, 34.26s/entry]\u001B[A\n",
71
+ "Processing entries: 44%|████▍ | 44/100 [36:29<33:08, 35.50s/entry]\u001B[A\n",
72
+ "Processing entries: 45%|████▌ | 45/100 [37:01<31:34, 34.45s/entry]\u001B[A\n",
73
+ "Processing entries: 46%|████▌ | 46/100 [37:32<30:13, 33.58s/entry]\u001B[A\n",
74
+ "Processing entries: 47%|████▋ | 47/100 [38:16<32:12, 36.47s/entry]\u001B[A\n",
75
+ "Processing entries: 48%|████▊ | 48/100 [39:01<33:47, 38.99s/entry]\u001B[A\n",
76
+ "Processing entries: 49%|████▉ | 49/100 [39:24<29:07, 34.27s/entry]\u001B[A\n",
77
+ "Processing entries: 50%|█████ | 50/100 [40:13<32:16, 38.72s/entry]\u001B[A\n",
78
+ "Processing entries: 51%|█████ | 51/100 [40:48<30:50, 37.78s/entry]\u001B[A\n",
79
+ "Processing entries: 52%|█████▏ | 52/100 [41:28<30:41, 38.37s/entry]\u001B[A\n",
80
+ "Processing entries: 53%|█████▎ | 53/100 [42:06<29:48, 38.06s/entry]\u001B[A\n",
81
+ "Processing entries: 54%|█████▍ | 54/100 [42:29<25:54, 33.80s/entry]\u001B[A\n",
82
+ "Processing entries: 55%|█████▌ | 55/100 [43:06<26:00, 34.68s/entry]\u001B[A\n",
83
+ "Processing entries: 56%|█████▌ | 56/100 [43:33<23:48, 32.46s/entry]\u001B[A\n",
84
+ "Processing entries: 57%|█████▋ | 57/100 [44:28<27:57, 39.02s/entry]\u001B[A\n",
85
+ "Processing entries: 58%|█████▊ | 58/100 [45:05<26:58, 38.53s/entry]\u001B[A\n",
86
+ "Processing entries: 59%|█████▉ | 59/100 [45:46<26:48, 39.22s/entry]\u001B[A\n",
87
+ "Processing entries: 60%|██████ | 60/100 [46:26<26:18, 39.46s/entry]\u001B[A\n",
88
+ "Processing entries: 61%|██████ | 61/100 [46:57<23:54, 36.77s/entry]\u001B[A\n",
89
+ "Processing entries: 62%|██████▏ | 62/100 [47:19<20:29, 32.36s/entry]\u001B[A\n",
90
+ "Processing entries: 63%|██████▎ | 63/100 [48:23<25:58, 42.12s/entry]\u001B[A\n",
91
+ "Processing entries: 64%|██████▍ | 64/100 [48:56<23:34, 39.29s/entry]\u001B[A\n",
92
+ "Processing entries: 65%|██████▌ | 65/100 [49:25<21:03, 36.11s/entry]\u001B[A\n",
93
+ "Processing entries: 66%|██████▌ | 66/100 [49:57<19:47, 34.92s/entry]\u001B[A\n",
94
+ "Processing entries: 67%|██████▋ | 67/100 [50:20<17:17, 31.44s/entry]\u001B[A\n",
95
+ "Processing entries: 68%|██████▊ | 68/100 [51:01<18:10, 34.07s/entry]\u001B[A\n",
96
+ "Processing entries: 69%|██████▉ | 69/100 [52:08<22:46, 44.07s/entry]\u001B[A\n",
97
+ "Processing entries: 70%|███████ | 70/100 [52:39<20:04, 40.14s/entry]\u001B[A\n",
98
+ "Processing entries: 71%|███████ | 71/100 [53:26<20:24, 42.22s/entry]\u001B[A\n",
99
+ "Processing entries: 72%|███████▏ | 72/100 [53:53<17:38, 37.80s/entry]\u001B[A\n",
100
+ "Processing entries: 73%|███████▎ | 73/100 [54:21<15:36, 34.67s/entry]\u001B[A\n",
101
+ "Processing entries: 74%|███████▍ | 74/100 [55:17<17:47, 41.06s/entry]\u001B[A\n",
102
+ "Processing entries: 75%|███████▌ | 75/100 [56:20<19:56, 47.84s/entry]\u001B[A\n",
103
+ "Processing entries: 76%|███████▌ | 76/100 [58:05<25:57, 64.89s/entry]\u001B[A\n",
104
+ "Processing entries: 77%|███████▋ | 77/100 [59:07<24:30, 63.95s/entry]\u001B[A\n",
105
+ "Processing entries: 78%|███████▊ | 78/100 [59:53<21:26, 58.48s/entry]\u001B[A\n",
106
+ "Processing entries: 79%|███████▉ | 79/100 [1:00:32<18:30, 52.89s/entry]\u001B[A\n",
107
+ "Processing entries: 80%|████████ | 80/100 [1:01:09<16:01, 48.10s/entry]\u001B[A\n",
108
+ "Processing entries: 81%|████████ | 81/100 [1:01:38<13:24, 42.37s/entry]\u001B[A\n",
109
+ "Processing entries: 82%|████████▏ | 82/100 [1:02:13<12:00, 40.02s/entry]\u001B[A"
110
+ ]
111
+ }
112
+ ],
113
+ "source": [
114
+ "import pandas as pd\n",
115
+ "from util.injection import process_scores_multiple\n",
116
+ "from util.model import AzureAgent, GPTAgent, Claude3Agent\n",
117
+ "from util.prompt import PROMPT_TEMPLATE\n",
118
+ "\n",
119
+ "def run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
120
+ " sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url=None):\n",
121
+ " # Load data\n",
122
+ " df = pd.read_csv(\"resume_subsampled.csv\")\n",
123
+ " \n",
124
+ " # Filter data by occupation\n",
125
+ " df = df[df[\"Occupation\"] == occupation]\n",
126
+ " df = df.sample(n=sample_size, random_state=42)\n",
127
+ " \n",
128
+ " # Initialize the agent\n",
129
+ " if model_type == 'AzureAgent':\n",
130
+ " agent = AzureAgent(api_key, endpoint_url, deployment_name)\n",
131
+ " elif model_type == 'GPTAgent':\n",
132
+ " api_version = '2024-02-15-preview'\n",
133
+ " agent = GPTAgent(api_key, endpoint_url, deployment_name, api_version)\n",
134
+ " else:\n",
135
+ " agent = Claude3Agent(api_key, deployment_name)\n",
136
+ " \n",
137
+ " # Process data\n",
138
+ " parameters = {\"temperature\": temperature, \"max_tokens\": max_tokens}\n",
139
+ " preprocessed_df = process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation, prompt_template)\n",
140
+ " \n",
141
+ " return preprocessed_df\n",
142
+ "\n",
143
+ "# Set experiment parameters\n",
144
+ "api_key = \"6c75a8235f204c9e8cf6228e485982f7\"\n",
145
+ "model_type = \"GPTAgent\" # or \"AzureAgent\" or \"Claude3Agent\"\n",
146
+ "deployment_name = \"gpt4-1106\"\n",
147
+ "temperature = 0.0\n",
148
+ "max_tokens = 300\n",
149
+ "file_path = \"resume_subsampled.csv\" # or path to your file\n",
150
+ "occupation = \"FINANCE\"\n",
151
+ "sample_size = 100\n",
152
+ "group_name = \"Gender\"\n",
153
+ "privilege_label = \"Male\"\n",
154
+ "protect_label = \"Female\"\n",
155
+ "num_run = 1\n",
156
+ "prompt_template = PROMPT_TEMPLATE\n",
157
+ "endpoint_url = \"https://safeguard-monitor.openai.azure.com/\"\n",
158
+ "\n",
159
+ "# Run experiment\n",
160
+ "results = run_experiment(api_key, model_type, deployment_name, temperature, max_tokens, occupation,\n",
161
+ " sample_size, group_name, privilege_label, protect_label, num_run, prompt_template, endpoint_url)\n",
162
+ "\n",
163
+ "# Display results\n",
164
+ "results.head()\n",
165
+ "\n",
166
+ "# Optionally save results to a CSV file\n",
167
+ "results.to_csv(f'result/{occupation}_results.csv', index=False)\n"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "outputs": [],
173
+ "source": [],
174
+ "metadata": {
175
+ "collapsed": false
176
+ },
177
+ "id": "43711da68c012a83"
178
+ }
179
+ ],
180
+ "metadata": {
181
+ "kernelspec": {
182
+ "display_name": "Python 3",
183
+ "language": "python",
184
+ "name": "python3"
185
+ },
186
+ "language_info": {
187
+ "codemirror_mode": {
188
+ "name": "ipython",
189
+ "version": 2
190
+ },
191
+ "file_extension": ".py",
192
+ "mimetype": "text/x-python",
193
+ "name": "python",
194
+ "nbconvert_exporter": "python",
195
+ "pygments_lexer": "ipython2",
196
+ "version": "2.7.6"
197
+ }
198
+ },
199
+ "nbformat": 4,
200
+ "nbformat_minor": 5
201
+ }
util/evaluation.py CHANGED
@@ -84,14 +84,26 @@ def statistical_tests(data):
84
  'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
85
  }
86
 
87
- selection_rates = {
 
 
 
 
 
 
 
 
 
 
 
88
  'Privilege': data['Privilege_Rank'].mean(),
89
  'Protect': data['Protect_Rank'].mean(),
90
  'Neutral': data['Neutral_Rank'].mean()
91
  }
92
- impact_ratios = calculate_impact_ratio(selection_rates)
93
- spd_result = statistical_parity_difference(selection_rates)
94
- adverse_impact = calculate_four_fifths_rule(impact_ratios)
 
95
 
96
  # Friedman test
97
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
@@ -114,9 +126,18 @@ def statistical_tests(data):
114
  **pairwise_results,
115
  "Levene's Test for Equality of Variances": levene_results,
116
  "Pairwise Comparisons of Variances": pairwise_variances,
117
- "Statistical Parity Difference": spd_result,
118
- "Disparate Impact Ratios": impact_ratios,
119
- "Four-Fifths Rule": adverse_impact,
 
 
 
 
 
 
 
 
 
120
  }
121
 
122
  return results
 
84
  'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
85
  }
86
 
87
+ selection_rates_Avg_Score = {
88
+ 'Privilege': data['Privilege_Avg_Score'].mean(),
89
+ 'Protect': data['Protect_Avg_Score'].mean(),
90
+ 'Neutral': data['Neutral_Avg_Score'].mean()
91
+ }
92
+ impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
93
+ spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
94
+ adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
95
+
96
+
97
+ # rank version of bias metrics
98
+ selection_rates_rank = {
99
  'Privilege': data['Privilege_Rank'].mean(),
100
  'Protect': data['Protect_Rank'].mean(),
101
  'Neutral': data['Neutral_Rank'].mean()
102
  }
103
+ impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
104
+ spd_result_rank = statistical_parity_difference(selection_rates_rank)
105
+ adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
106
+
107
 
108
  # Friedman test
109
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
 
126
  **pairwise_results,
127
  "Levene's Test for Equality of Variances": levene_results,
128
  "Pairwise Comparisons of Variances": pairwise_variances,
129
+ "Statistical Parity Difference": {
130
+ "Avg_Score": spd_result_Avg_Score,
131
+ "Rank": spd_result_rank
132
+ },
133
+ "Disparate Impact Ratios": {
134
+ "Avg_Score": impact_ratios_Avg_Score,
135
+ "Rank": impact_ratios_rank
136
+ },
137
+ "Four-Fifths Rule": {
138
+ "Avg_Score": adverse_impact_Avg_Score,
139
+ "Rank": adverse_impact_rank
140
+ }
141
  }
142
 
143
  return results
util/injection.py CHANGED
@@ -33,9 +33,9 @@ def invoke_retry(prompt, agent, parameters, string_input=False):
33
  while attempts < max_attempts:
34
  try:
35
  score_text = agent.invoke(prompt, **parameters)
36
- print(f"Prompt: {prompt}")
37
- print(f"Score text: {score_text}")
38
- print("=============================================================")
39
  if string_input:
40
  return score_text
41
  try:
@@ -48,7 +48,7 @@ def invoke_retry(prompt, agent, parameters, string_input=False):
48
  raise Exception("Failed to decode JSON response even after repair attempt.")
49
  # score = re.search(r'\d+', score_text)
50
  # return int(score.group()) if score else -1
51
- print(f"Score JSON: {score_json}")
52
  return int(score_json['Score'])
53
 
54
  except Exception as e:
@@ -82,12 +82,12 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
82
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
83
  prompt_normal = create_summary(group_name, label, occupation, row, template)
84
 
85
- print(f"Run {run + 1} - Entry {index + 1} - {key}")
86
- print("=============================================================")
87
  result_normal = invoke_retry(prompt_normal, agent, parameters)
88
  scores[key][index].append(result_normal)
89
 
90
- print(f"Scores: {scores}")
91
 
92
  # Ensure all scores are lists and calculate average scores
93
  for category in ['Privilege', 'Protect', 'Neutral']:
@@ -107,3 +107,4 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
107
  df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
108
 
109
  return df
 
 
33
  while attempts < max_attempts:
34
  try:
35
  score_text = agent.invoke(prompt, **parameters)
36
+ #print(f"Prompt: {prompt}")
37
+ # print(f"Score text: {score_text}")
38
+ # print("=============================================================")
39
  if string_input:
40
  return score_text
41
  try:
 
48
  raise Exception("Failed to decode JSON response even after repair attempt.")
49
  # score = re.search(r'\d+', score_text)
50
  # return int(score.group()) if score else -1
51
+ #print(f"Score JSON: {score_json}")
52
  return int(score_json['Score'])
53
 
54
  except Exception as e:
 
82
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
83
  prompt_normal = create_summary(group_name, label, occupation, row, template)
84
 
85
+ # print(f"Run {run + 1} - Entry {index + 1} - {key}")
86
+ # print("=============================================================")
87
  result_normal = invoke_retry(prompt_normal, agent, parameters)
88
  scores[key][index].append(result_normal)
89
 
90
+ #print(f"Scores: {scores}")
91
 
92
  # Ensure all scores are lists and calculate average scores
93
  for category in ['Privilege', 'Protect', 'Neutral']:
 
107
  df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
108
 
109
  return df
110
+