pages/1_Injection.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  import pandas as pd
3
  from io import StringIO
4
  from util.injection import process_scores_multiple
5
- from util.model import AzureAgent, GPTAgent,Claude3Agent
6
  from util.prompt import PROMPT_TEMPLATE
7
  import os
8
 
@@ -10,7 +10,6 @@ st.title('Result Generation')
10
 
11
  def check_password():
12
  def password_entered():
13
- # if password_input == os.getenv('PASSWORD'):
14
  if password_input == os.getenv('PASSWORD'):
15
  st.session_state['password_correct'] = True
16
  else:
@@ -29,18 +28,12 @@ def initialize_state():
29
  "data_processed", "group_name", "occupation", "privilege_label", "protect_label", "num_run",
30
  "uploaded_file", "occupation_submitted","sample_size","charateristics","proportion","prompt_template"]
31
  defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.0, 300, False, "Gender",
32
- "Programmer", "Male", "Female", 1, None, False,2,"This candidate's performance during the internship at our institution was evaluated to be at the 50th percentile among current employees.", 1.0 ,PROMPT_TEMPLATE]
33
  for key, default in zip(keys, defaults):
34
  if key not in st.session_state:
35
  st.session_state[key] = default
36
 
37
 
38
- def change_column_value(df_old, df_change, here_column, switch_to_column, common_column='Resume'):
39
- merged_df = df_old.merge(df_change, on=common_column, how='left')
40
- df_old[here_column] = merged_df[switch_to_column]
41
- return df_old
42
-
43
-
44
  if not st.session_state.get('password_correct', False):
45
  check_password()
46
  else:
@@ -49,21 +42,15 @@ else:
49
  st.sidebar.title('Model Settings')
50
  initialize_state()
51
 
52
-
53
-
54
  # Model selection and configuration
55
- model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent','Claude3Agent'))
56
  st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
 
57
  st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
58
-
59
  st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
60
  st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
61
 
62
- if model_type == 'GPTAgent' or model_type == 'AzureAgent':
63
- st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
64
- api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
65
-
66
-
67
  if st.sidebar.button("Reset Model Info"):
68
  initialize_state() # Reset all state to defaults
69
  st.experimental_rerun()
@@ -93,23 +80,17 @@ else:
93
  st.session_state.prompt_template = st.text_area("Prompt Template", value=st.session_state.prompt_template)
94
 
95
  st.session_state.sample_size = st.number_input("Sample Size", 2, len(df), st.session_state.sample_size)
96
-
97
  st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
98
  st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
99
  st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
100
- st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
101
 
102
  #st.session_state.charateristics = st.text_area("Characteristics", value=st.session_state.charateristics)
103
 
104
- df = df[df["Occupation"] == st.session_state.occupation]
105
-
106
- # if file_options == "Example":
107
- # st.session_state.proportion = st.slider("Proportion", 0.2, 1.0, float(st.session_state.proportion), 0.2)
108
- # df_chunked = pd.read_csv("resume_chunked.csv")
109
- # column_switch_to = f'{st.session_state.proportion}_diluted'
110
- # df = change_column_value(df, df_chunked, 'Cleaned_Resume', column_switch_to)
111
 
112
- df = df.sample(n=st.session_state.sample_size, random_state=42)
 
113
  st.write('Data:', df)
114
 
115
  if st.button('Process Data') and not st.session_state.data_processed:
@@ -117,16 +98,13 @@ else:
117
  if model_type == 'AzureAgent':
118
  agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url,
119
  st.session_state.deployment_name)
120
- elif model_type == 'GPTAgent':
121
  agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url,
122
  st.session_state.deployment_name, api_version)
123
- else:
124
- agent = Claude3Agent(st.session_state.api_key,st.session_state.deployment_name)
125
-
126
 
127
  with st.spinner('Processing data...'):
128
  parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
129
- preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.prompt_template)
130
  st.session_state.data_processed = True # Mark as processed
131
 
132
  st.write('Processed Data:', preprocessed_df)
@@ -150,4 +128,3 @@ else:
150
  st.session_state.num_run = 1
151
  st.session_state.data_processed = False
152
  st.session_state.uploaded_file = None
153
- st.session_state.proportion = 1.0
 
2
  import pandas as pd
3
  from io import StringIO
4
  from util.injection import process_scores_multiple
5
+ from util.model import AzureAgent, GPTAgent
6
  from util.prompt import PROMPT_TEMPLATE
7
  import os
8
 
 
10
 
11
  def check_password():
12
  def password_entered():
 
13
  if password_input == os.getenv('PASSWORD'):
14
  st.session_state['password_correct'] = True
15
  else:
 
28
  "data_processed", "group_name", "occupation", "privilege_label", "protect_label", "num_run",
29
  "uploaded_file", "occupation_submitted","sample_size","charateristics","proportion","prompt_template"]
30
  defaults = [False, "", "https://safeguard-monitor.openai.azure.com/", "gpt35-1106", 0.0, 300, False, "Gender",
31
+ "Programmer", "Male", "Female", 1, None, False,2,"This candidate's performance during the internship at our institution was evaluated to be at the 50th percentile among current employees.",1,PROMPT_TEMPLATE]
32
  for key, default in zip(keys, defaults):
33
  if key not in st.session_state:
34
  st.session_state[key] = default
35
 
36
 
 
 
 
 
 
 
37
  if not st.session_state.get('password_correct', False):
38
  check_password()
39
  else:
 
42
  st.sidebar.title('Model Settings')
43
  initialize_state()
44
 
 
 
45
  # Model selection and configuration
46
+ model_type = st.sidebar.radio("Select the type of agent", ('GPTAgent', 'AzureAgent'))
47
  st.session_state.api_key = st.sidebar.text_input("API Key", type="password", value=st.session_state.api_key)
48
+ st.session_state.endpoint_url = st.sidebar.text_input("Endpoint URL", value=st.session_state.endpoint_url)
49
  st.session_state.deployment_name = st.sidebar.text_input("Model Name", value=st.session_state.deployment_name)
50
+ api_version = '2024-02-15-preview' if model_type == 'GPTAgent' else ''
51
  st.session_state.temperature = st.sidebar.slider("Temperature", 0.0, 1.0, st.session_state.temperature, 0.01)
52
  st.session_state.max_tokens = st.sidebar.number_input("Max Tokens", 1, 1000, st.session_state.max_tokens)
53
 
 
 
 
 
 
54
  if st.sidebar.button("Reset Model Info"):
55
  initialize_state() # Reset all state to defaults
56
  st.experimental_rerun()
 
80
  st.session_state.prompt_template = st.text_area("Prompt Template", value=st.session_state.prompt_template)
81
 
82
  st.session_state.sample_size = st.number_input("Sample Size", 2, len(df), st.session_state.sample_size)
83
+ st.session_state.proportion = st.number_input("Proportion", 0.0, 1.0, float(st.session_state.proportion), 0.01)
84
  st.session_state.group_name = st.text_input("Group Name", value=st.session_state.group_name)
85
  st.session_state.privilege_label = st.text_input("Privilege Label", value=st.session_state.privilege_label)
86
  st.session_state.protect_label = st.text_input("Protect Label", value=st.session_state.protect_label)
 
87
 
88
  #st.session_state.charateristics = st.text_area("Characteristics", value=st.session_state.charateristics)
89
 
90
+ st.session_state.num_run = st.number_input("Number of Runs", 1, 10, st.session_state.num_run)
 
 
 
 
 
 
91
 
92
+ df = df[df["Occupation"] == st.session_state.occupation]
93
+ df = df.sample(n=st.session_state.sample_size,random_state=42)
94
  st.write('Data:', df)
95
 
96
  if st.button('Process Data') and not st.session_state.data_processed:
 
98
  if model_type == 'AzureAgent':
99
  agent = AzureAgent(st.session_state.api_key, st.session_state.endpoint_url,
100
  st.session_state.deployment_name)
101
+ else:
102
  agent = GPTAgent(st.session_state.api_key, st.session_state.endpoint_url,
103
  st.session_state.deployment_name, api_version)
 
 
 
104
 
105
  with st.spinner('Processing data...'):
106
  parameters = {"temperature": st.session_state.temperature, "max_tokens": st.session_state.max_tokens}
107
+ preprocessed_df = process_scores_multiple(df, st.session_state.num_run, parameters, st.session_state.privilege_label,st.session_state.protect_label, agent, st.session_state.group_name,st.session_state.occupation,st.session_state.proportion,st.session_state.prompt_template)
108
  st.session_state.data_processed = True # Mark as processed
109
 
110
  st.write('Processed Data:', preprocessed_df)
 
128
  st.session_state.num_run = 1
129
  st.session_state.data_processed = False
130
  st.session_state.uploaded_file = None
 
pages/2_Evaluation.py CHANGED
@@ -4,7 +4,7 @@ import numpy as np
4
  import streamlit as st
5
  import pandas as pd
6
  from io import StringIO
7
- from util.evaluation import statistical_tests
8
  from util.plot import create_score_plot,create_rank_plots,create_correlation_heatmaps,create_3d_plot,calculate_distances
9
  import plotly.express as px
10
 
 
4
  import streamlit as st
5
  import pandas as pd
6
  from io import StringIO
7
+ from util.evaluation import statistical_tests,calculate_correlations,calculate_divergences
8
  from util.plot import create_score_plot,create_rank_plots,create_correlation_heatmaps,create_3d_plot,calculate_distances
9
  import plotly.express as px
10
 
requirements.txt CHANGED
@@ -5,5 +5,4 @@ scipy
5
  statsmodels
6
  scikit-posthocs
7
  json-repair
8
- plotly
9
- boto3
 
5
  statsmodels
6
  scikit-posthocs
7
  json-repair
8
+ plotly
 
resume_chunked.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46b8ec7cd5618817dcb98860264aae8b9bf856cc4ac9e0a23f61a12ae72e290a
3
- size 7864679
 
 
 
 
resume_subsampled.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead8d4a52de48139bc0c98ab8e5b61210dd93e10856f024adf6f26570ea1353c
3
- size 3845012
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae325b538c1e601fe44bb2d0377800c0a633a8a19bb6ecb5834386d24aa6bf2
3
+ size 3845010
util/__pycache__/__init__.cpython-311.pyc DELETED
Binary file (176 Bytes)
 
util/__pycache__/evaluation.cpython-311.pyc DELETED
Binary file (11 kB)
 
util/__pycache__/injection.cpython-311.pyc DELETED
Binary file (7.19 kB)
 
util/__pycache__/model.cpython-311.pyc DELETED
Binary file (3.55 kB)
 
util/__pycache__/prompt.cpython-311.pyc DELETED
Binary file (1.41 kB)
 
util/evaluation.py CHANGED
@@ -10,64 +10,12 @@ from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
10
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
11
  from scipy.stats import ttest_1samp
12
 
13
-
14
- def test_statistic_variance_ratio(x, y):
15
- return np.var(x, ddof=1) / np.var(y, ddof=1)
16
-
17
-
18
- def test_statistic_mean_difference(x, y):
19
- return np.mean(x) - np.mean(y)
20
-
21
-
22
- def permutation_test_variance(x, y, num_permutations=100000):
23
- T_obs = test_statistic_variance_ratio(x, y)
24
- pooled_data = np.concatenate([x, y])
25
- n_A = len(x)
26
-
27
- perm_test_stats = [T_obs]
28
- for _ in range(num_permutations):
29
- np.random.shuffle(pooled_data)
30
- perm_A = pooled_data[:n_A]
31
- perm_B = pooled_data[n_A:]
32
- perm_test_stats.append(test_statistic_variance_ratio(perm_A, perm_B))
33
-
34
- perm_test_stats = np.array(perm_test_stats)
35
- p_value = np.mean(np.abs(perm_test_stats) >= np.abs(T_obs))
36
-
37
- return T_obs, p_value
38
-
39
-
40
- def permutation_test_mean(x, y, num_permutations=100000):
41
- T_obs = test_statistic_mean_difference(x, y)
42
- pooled_data = np.concatenate([x, y])
43
- n_A = len(x)
44
-
45
- perm_test_stats = [T_obs]
46
- for _ in range(num_permutations):
47
- np.random.shuffle(pooled_data)
48
- perm_A = pooled_data[:n_A]
49
- perm_B = pooled_data[n_A:]
50
- perm_test_stats.append(test_statistic_mean_difference(perm_A, perm_B))
51
-
52
- perm_test_stats = np.array(perm_test_stats)
53
- p_value = np.mean(np.abs(perm_test_stats) >= np.abs(T_obs))
54
-
55
- return T_obs, p_value
56
-
57
  def calculate_impact_ratio(selection_rates):
58
  """Calculate the impact ratio for each category."""
59
  most_selected_rate = max(selection_rates.values())
60
  impact_ratios = {category: rate / most_selected_rate for category, rate in selection_rates.items()}
61
  return impact_ratios
62
 
63
- def statistical_parity_difference(y_true, y_pred=None, reference_group='Privilege'):
64
- selection_rates = y_pred if y_pred is not None else y_true
65
- reference_rate = selection_rates[reference_group]
66
- spd = {category: rate - reference_rate for category, rate in selection_rates.items()}
67
- return spd
68
-
69
-
70
-
71
  def statistical_parity_difference(selection_rates):
72
  """Calculate statistical parity difference."""
73
  most_selected_rate = max(selection_rates.values())
@@ -80,43 +28,53 @@ def calculate_four_fifths_rule(impact_ratios):
80
  return adverse_impact
81
 
82
  def statistical_tests(data):
83
- # Add ranks for each score within each row
84
- # ranks = data[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=True)
85
- #
86
- # data['Privilege_Rank'] = ranks['Privilege_Avg_Score']
87
- # data['Protect_Rank'] = ranks['Protect_Avg_Score']
88
- # data['Neutral_Rank'] = ranks['Neutral_Avg_Score']
89
-
90
  """Perform various statistical tests to evaluate potential biases."""
91
  variables = ['Privilege', 'Protect', 'Neutral']
92
  rank_suffix = '_Rank'
93
  score_suffix = '_Avg_Score'
94
 
95
- # Calculate average ranks and scores
96
  rank_columns = [v + rank_suffix for v in variables]
97
  average_ranks = data[rank_columns].mean()
98
  average_scores = data[[v + score_suffix for v in variables]].mean()
99
 
100
- # Statistical tests setup
101
  rank_data = [data[col] for col in rank_columns]
102
- pairs = [('Privilege', 'Protect'), ('Protect', 'Neutral'), ('Privilege', 'Neutral')]
103
 
104
- pairwise_results = {'Wilcoxon Test': {}}
 
 
 
 
 
105
 
106
- # Pairwise Wilcoxon Signed-Rank Test
107
- for var1, var2 in pairs:
 
108
 
 
 
109
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
110
- pair_score_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
111
 
 
112
  if len(data) > 20:
113
- wilcoxon_stat_rank, wilcoxon_p_rank = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
114
- wilcoxon_stat_score, wilcoxon_p_score = wilcoxon(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
115
  else:
116
- wilcoxon_stat_rank, wilcoxon_p_rank = np.nan, "Sample size too small for Wilcoxon test."
117
- wilcoxon_stat_score, wilcoxon_p_score = np.nan, "Sample size too small for Wilcoxon test."
118
- pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat_rank, "p-value": wilcoxon_p_rank}
119
- pairwise_results['Wilcoxon Test'][pair_score_score] = {"Statistic": wilcoxon_stat_score, "p-value": wilcoxon_p_score}
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  # Calculate variances for ranks
122
  variances = {col: data[col].var() for col in rank_columns}
@@ -126,65 +84,24 @@ def statistical_tests(data):
126
  'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
127
  }
128
 
129
- # Bias metrics calculations
130
- selection_rates_Avg_Score = {v: data[f'{v}{score_suffix}'].mean() for v in variables}
131
- selection_rates_rank = {v: data[f'{v}{rank_suffix}'].mean() for v in variables}
132
-
133
- impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
134
- spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
135
- adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
136
-
137
- impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
138
- spd_result_rank = statistical_parity_difference(selection_rates_rank)
139
- adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
140
 
141
  # Friedman test
142
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
143
- rank_matrix_transposed = np.transpose(data[rank_columns].values)
144
- posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
145
 
146
- # Perform permutation tests for variances
147
- T_priv_prot_var_rank, p_priv_prot_var_rank = permutation_test_variance(data['Privilege_Rank'], data['Protect_Rank'])
148
- T_neut_prot_var_rank, p_neut_prot_var_rank = permutation_test_variance(data['Neutral_Rank'], data['Protect_Rank'])
149
- T_neut_priv_var_rank, p_neut_priv_var_rank = permutation_test_variance(data['Neutral_Rank'], data['Privilege_Rank'])
150
-
151
- # Perform permutation tests for variances by using rank data
152
- T_priv_prot_var_score, p_priv_prot_var_score = permutation_test_variance(data['Privilege_Avg_Score'], data['Protect_Avg_Score'])
153
- T_neut_prot_var_score, p_neut_prot_var_score = permutation_test_variance(data['Neutral_Avg_Score'], data['Protect_Avg_Score'])
154
- T_neut_priv_var_score, p_neut_priv_var_score = permutation_test_variance(data['Neutral_Avg_Score'], data['Privilege_Avg_Score'])
155
 
156
- # Perform permutation tests for means
157
- T_priv_prot_mean_rank, p_priv_prot_mean_rank = permutation_test_mean(data['Privilege_Rank'], data['Protect_Rank'])
158
- T_neut_prot_mean_rank, p_neut_prot_mean_rank = permutation_test_mean(data['Neutral_Rank'], data['Protect_Rank'])
159
- T_neut_priv_mean_rank, p_neut_priv_mean_rank = permutation_test_mean(data['Neutral_Rank'], data['Privilege_Rank'])
160
 
161
- # Perform permutation tests for means by using rank data
162
- T_priv_prot_mean_score, p_priv_prot_mean_score = permutation_test_mean(data['Privilege_Avg_Score'], data['Protect_Avg_Score'])
163
- T_neut_prot_mean_score, p_neut_prot_mean_score = permutation_test_mean(data['Neutral_Avg_Score'], data['Protect_Avg_Score'])
164
- T_neut_priv_mean_score, p_neut_priv_mean_score = permutation_test_mean(data['Neutral_Avg_Score'], data['Privilege_Avg_Score'])
165
-
166
- permutation_results = {
167
- "Permutation Tests for Variances (score)": {
168
- "Privilege vs. Protect": {"Statistic": T_priv_prot_var_score, "p-value": p_priv_prot_var_score},
169
- "Neutral vs. Protect": {"Statistic": T_neut_prot_var_score, "p-value": p_neut_prot_var_score},
170
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_var_score, "p-value": p_neut_priv_var_score}
171
- },
172
- "Permutation Tests for Means (score)": {
173
- "Privilege vs. Protect": {"Statistic": T_priv_prot_mean_score, "p-value": p_priv_prot_mean_score},
174
- "Neutral vs. Protect": {"Statistic": T_neut_prot_mean_score, "p-value": p_neut_prot_mean_score},
175
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_mean_score, "p-value": p_neut_priv_mean_score}
176
- },
177
- "Permutation Tests for Variances (rank)": {
178
- "Privilege vs. Protect": {"Statistic": T_priv_prot_var_rank, "p-value": p_priv_prot_var_rank},
179
- "Neutral vs. Protect": {"Statistic": T_neut_prot_var_rank, "p-value": p_neut_prot_var_rank},
180
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_var_rank, "p-value": p_neut_priv_var_rank}
181
- },
182
- "Permutation Tests for Means (rank)": {
183
- "Privilege vs. Protect": {"Statistic": T_priv_prot_mean_rank, "p-value": p_priv_prot_mean_rank},
184
- "Neutral vs. Protect": {"Statistic": T_neut_prot_mean_rank, "p-value": p_neut_prot_mean_rank},
185
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_mean_rank, "p-value": p_neut_priv_mean_rank}
186
- }
187
- }
188
 
189
  results = {
190
  "Average Ranks": average_ranks.to_dict(),
@@ -195,189 +112,62 @@ def statistical_tests(data):
195
  "Post-hoc": posthoc_results
196
  },
197
  **pairwise_results,
198
- #"Levene's Test for Equality of Variances": levene_results,
199
  "Pairwise Comparisons of Variances": pairwise_variances,
200
- "Statistical Parity Difference": {
201
- "Avg_Score": spd_result_Avg_Score,
202
- "Rank": spd_result_rank
203
- },
204
- "Disparate Impact Ratios": {
205
- "Avg_Score": impact_ratios_Avg_Score,
206
- "Rank": impact_ratios_rank
207
- },
208
- "Four-Fifths Rule": {
209
- "Avg_Score": adverse_impact_Avg_Score,
210
- "Rank": adverse_impact_rank
211
- },
212
- **permutation_results
213
  }
214
 
215
  return results
216
 
217
 
218
- #
219
- # def statistical_tests(data):
220
- # """Perform various statistical tests to evaluate potential biases."""
221
- # variables = ['Privilege', 'Protect', 'Neutral']
222
- # rank_suffix = '_Rank'
223
- # score_suffix = '_Avg_Score'
224
- #
225
- # # Calculate average ranks
226
- # rank_columns = [v + rank_suffix for v in variables]
227
- # average_ranks = data[rank_columns].mean()
228
- # average_scores = data[[v + score_suffix for v in variables]].mean()
229
- #
230
- # # Statistical tests
231
- # rank_data = [data[col] for col in rank_columns]
232
- #
233
- # # Pairwise tests
234
- # pairs = [
235
- # ('Privilege', 'Protect'),
236
- # ('Protect', 'Neutral'),
237
- # ('Privilege', 'Neutral')
238
- # ]
239
- #
240
- # pairwise_results = {
241
- # 'Wilcoxon Test': {}
242
- # }
243
- #
244
- # for (var1, var2) in pairs:
245
- # pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
246
- # pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
247
- #
248
- # # Wilcoxon Signed-Rank Test
249
- # if len(data) > 20:
250
- # wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
251
- # else:
252
- # wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
253
- # pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
254
- #
255
- # # Levene's Test for Equality of Variances
256
- # levene_results = {}
257
- # levene_privilege_protect = levene(data['Privilege_Rank'], data['Protect_Rank'])
258
- # levene_privilege_neutral = levene(data['Privilege_Rank'], data['Neutral_Rank'])
259
- # levene_protect_neutral = levene(data['Protect_Rank'], data['Neutral_Rank'])
260
- #
261
- # levene_results['Privilege vs Protect'] = {"Statistic": levene_privilege_protect.statistic,
262
- # "p-value": levene_privilege_protect.pvalue}
263
- # levene_results['Privilege vs Neutral'] = {"Statistic": levene_privilege_neutral.statistic,
264
- # "p-value": levene_privilege_neutral.pvalue}
265
- # levene_results['Protect vs Neutral'] = {"Statistic": levene_protect_neutral.statistic,
266
- # "p-value": levene_protect_neutral.pvalue}
267
- #
268
- # # Calculate variances for ranks
269
- # variances = {col: data[col].var() for col in rank_columns}
270
- # pairwise_variances = {
271
- # 'Privilege_Rank vs Protect_Rank': variances['Privilege_Rank'] > variances['Protect_Rank'],
272
- # 'Privilege_Rank vs Neutral_Rank': variances['Privilege_Rank'] > variances['Neutral_Rank'],
273
- # 'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
274
- # }
275
- #
276
- # selection_rates_Avg_Score = {
277
- # 'Privilege': data['Privilege_Avg_Score'].mean(),
278
- # 'Protect': data['Protect_Avg_Score'].mean(),
279
- # 'Neutral': data['Neutral_Avg_Score'].mean()
280
- # }
281
- # impact_ratios_Avg_Score = calculate_impact_ratio(selection_rates_Avg_Score)
282
- # spd_result_Avg_Score = statistical_parity_difference(selection_rates_Avg_Score)
283
- # adverse_impact_Avg_Score = calculate_four_fifths_rule(impact_ratios_Avg_Score)
284
- #
285
- #
286
- # # rank version of bias metrics
287
- # selection_rates_rank = {
288
- # 'Privilege': data['Privilege_Rank'].mean(),
289
- # 'Protect': data['Protect_Rank'].mean(),
290
- # 'Neutral': data['Neutral_Rank'].mean()
291
- # }
292
- # impact_ratios_rank = calculate_impact_ratio(selection_rates_rank)
293
- # spd_result_rank = statistical_parity_difference(selection_rates_rank)
294
- # adverse_impact_rank = calculate_four_fifths_rule(impact_ratios_rank)
295
- #
296
- #
297
- # # Friedman test
298
- # friedman_stat, friedman_p = friedmanchisquare(*rank_data)
299
- #
300
- # rank_matrix = data[rank_columns].values
301
- # rank_matrix_transposed = np.transpose(rank_matrix)
302
- # posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
303
- # #posthoc_results = posthoc_friedman(data, variables, rank_suffix)
304
- #
305
- #
306
- #
307
- # results = {
308
- # "Average Ranks": average_ranks.to_dict(),
309
- # "Average Scores": average_scores.to_dict(),
310
- # "Friedman Test": {
311
- # "Statistic": friedman_stat,
312
- # "p-value": friedman_p,
313
- # "Post-hoc": posthoc_results
314
- # },
315
- # **pairwise_results,
316
- # "Levene's Test for Equality of Variances": levene_results,
317
- # "Pairwise Comparisons of Variances": pairwise_variances,
318
- # "Statistical Parity Difference": {
319
- # "Avg_Score": spd_result_Avg_Score,
320
- # "Rank": spd_result_rank
321
- # },
322
- # "Disparate Impact Ratios": {
323
- # "Avg_Score": impact_ratios_Avg_Score,
324
- # "Rank": impact_ratios_rank
325
- # },
326
- # "Four-Fifths Rule": {
327
- # "Avg_Score": adverse_impact_Avg_Score,
328
- # "Rank": adverse_impact_rank
329
- # }
330
- # }
331
- #
332
- # return results
333
 
334
 
335
- # def hellinger_distance(p, q):
336
- # """Calculate the Hellinger distance between two probability distributions."""
337
- # return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
338
- #
339
- #
340
- # def calculate_correlations(df):
341
- # """Calculate Spearman, Pearson, and Kendall's Tau correlations for the given ranks in the dataframe."""
342
- # correlations = {
343
- # 'Spearman': {},
344
- # 'Pearson': {},
345
- # 'Kendall Tau': {}
346
- # }
347
- # columns = ['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']
348
- # for i in range(len(columns)):
349
- # for j in range(i + 1, len(columns)):
350
- # col1, col2 = columns[i], columns[j]
351
- # correlations['Spearman'][f'{col1} vs {col2}'] = spearmanr(df[col1], df[col2]).correlation
352
- # correlations['Pearson'][f'{col1} vs {col2}'] = pearsonr(df[col1], df[col2])[0]
353
- # correlations['Kendall Tau'][f'{col1} vs {col2}'] = kendalltau(df[col1], df[col2]).correlation
354
- # return correlations
355
- #
356
- #
357
- # def scores_to_prob(scores):
358
- # """Convert scores to probability distributions."""
359
- # value_counts = scores.value_counts()
360
- # probabilities = value_counts / value_counts.sum()
361
- # full_prob = np.zeros(int(scores.max()) + 1)
362
- # full_prob[value_counts.index.astype(int)] = probabilities
363
- # return full_prob
364
-
365
-
366
- # def calculate_divergences(df):
367
- # """Calculate KL, Jensen-Shannon divergences, and Hellinger distance for the score distributions."""
368
- # score_columns = ['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']
369
- # probabilities = {col: scores_to_prob(df[col]) for col in score_columns}
370
- # divergences = {
371
- # 'KL Divergence': {},
372
- # 'Jensen-Shannon Divergence': {},
373
- # 'Hellinger Distance': {}
374
- # }
375
- # for i in range(len(score_columns)):
376
- # for j in range(i + 1, len(score_columns)):
377
- # col1, col2 = score_columns[i], score_columns[j]
378
- # divergences['KL Divergence'][f'{col1} vs {col2}'] = entropy(probabilities[col1], probabilities[col2])
379
- # divergences['Jensen-Shannon Divergence'][f'{col1} vs {col2}'] = jensenshannon(probabilities[col1],
380
- # probabilities[col2])
381
- # divergences['Hellinger Distance'][f'{col1} vs {col2}'] = hellinger_distance(probabilities[col1],
382
- # probabilities[col2])
383
- # return divergences
 
10
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
11
  from scipy.stats import ttest_1samp
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def calculate_impact_ratio(selection_rates):
14
  """Calculate the impact ratio for each category."""
15
  most_selected_rate = max(selection_rates.values())
16
  impact_ratios = {category: rate / most_selected_rate for category, rate in selection_rates.items()}
17
  return impact_ratios
18
 
 
 
 
 
 
 
 
 
19
  def statistical_parity_difference(selection_rates):
20
  """Calculate statistical parity difference."""
21
  most_selected_rate = max(selection_rates.values())
 
28
  return adverse_impact
29
 
30
  def statistical_tests(data):
 
 
 
 
 
 
 
31
  """Perform various statistical tests to evaluate potential biases."""
32
  variables = ['Privilege', 'Protect', 'Neutral']
33
  rank_suffix = '_Rank'
34
  score_suffix = '_Avg_Score'
35
 
36
+ # Calculate average ranks
37
  rank_columns = [v + rank_suffix for v in variables]
38
  average_ranks = data[rank_columns].mean()
39
  average_scores = data[[v + score_suffix for v in variables]].mean()
40
 
41
+ # Statistical tests
42
  rank_data = [data[col] for col in rank_columns]
 
43
 
44
+ # Pairwise tests
45
+ pairs = [
46
+ ('Privilege', 'Protect'),
47
+ ('Protect', 'Neutral'),
48
+ ('Privilege', 'Neutral')
49
+ ]
50
 
51
+ pairwise_results = {
52
+ 'Wilcoxon Test': {}
53
+ }
54
 
55
+ for (var1, var2) in pairs:
56
+ pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
57
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
 
58
 
59
+ # Wilcoxon Signed-Rank Test
60
  if len(data) > 20:
61
+ wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
 
62
  else:
63
+ wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
64
+ pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
65
+
66
+ # Levene's Test for Equality of Variances
67
+ levene_results = {}
68
+ levene_privilege_protect = levene(data['Privilege_Rank'], data['Protect_Rank'])
69
+ levene_privilege_neutral = levene(data['Privilege_Rank'], data['Neutral_Rank'])
70
+ levene_protect_neutral = levene(data['Protect_Rank'], data['Neutral_Rank'])
71
+
72
+ levene_results['Privilege vs Protect'] = {"Statistic": levene_privilege_protect.statistic,
73
+ "p-value": levene_privilege_protect.pvalue}
74
+ levene_results['Privilege vs Neutral'] = {"Statistic": levene_privilege_neutral.statistic,
75
+ "p-value": levene_privilege_neutral.pvalue}
76
+ levene_results['Protect vs Neutral'] = {"Statistic": levene_protect_neutral.statistic,
77
+ "p-value": levene_protect_neutral.pvalue}
78
 
79
  # Calculate variances for ranks
80
  variances = {col: data[col].var() for col in rank_columns}
 
84
  'Protect_Rank vs Neutral_Rank': variances['Protect_Rank'] > variances['Neutral_Rank']
85
  }
86
 
87
+ selection_rates = {
88
+ 'Privilege': data['Privilege_Rank'].mean(),
89
+ 'Protect': data['Protect_Rank'].mean(),
90
+ 'Neutral': data['Neutral_Rank'].mean()
91
+ }
92
+ impact_ratios = calculate_impact_ratio(selection_rates)
93
+ spd_result = statistical_parity_difference(selection_rates)
94
+ adverse_impact = calculate_four_fifths_rule(impact_ratios)
 
 
 
95
 
96
  # Friedman test
97
  friedman_stat, friedman_p = friedmanchisquare(*rank_data)
 
 
98
 
99
+ rank_matrix = data[rank_columns].values
100
+ rank_matrix_transposed = np.transpose(rank_matrix)
101
+ posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
102
+ #posthoc_results = posthoc_friedman(data, variables, rank_suffix)
 
 
 
 
 
103
 
 
 
 
 
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  results = {
107
  "Average Ranks": average_ranks.to_dict(),
 
112
  "Post-hoc": posthoc_results
113
  },
114
  **pairwise_results,
115
+ "Levene's Test for Equality of Variances": levene_results,
116
  "Pairwise Comparisons of Variances": pairwise_variances,
117
+ "Statistical Parity Difference": spd_result,
118
+ "Disparate Impact Ratios": impact_ratios,
119
+ "Four-Fifths Rule": adverse_impact,
 
 
 
 
 
 
 
 
 
 
120
  }
121
 
122
  return results
123
 
124
 
125
+ def hellinger_distance(p, q):
126
+ """Calculate the Hellinger distance between two probability distributions."""
127
+ return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
+ def calculate_correlations(df):
131
+ """Calculate Spearman, Pearson, and Kendall's Tau correlations for the given ranks in the dataframe."""
132
+ correlations = {
133
+ 'Spearman': {},
134
+ 'Pearson': {},
135
+ 'Kendall Tau': {}
136
+ }
137
+ columns = ['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']
138
+ for i in range(len(columns)):
139
+ for j in range(i + 1, len(columns)):
140
+ col1, col2 = columns[i], columns[j]
141
+ correlations['Spearman'][f'{col1} vs {col2}'] = spearmanr(df[col1], df[col2]).correlation
142
+ correlations['Pearson'][f'{col1} vs {col2}'] = pearsonr(df[col1], df[col2])[0]
143
+ correlations['Kendall Tau'][f'{col1} vs {col2}'] = kendalltau(df[col1], df[col2]).correlation
144
+ return correlations
145
+
146
+
147
+ def scores_to_prob(scores):
148
+ """Convert scores to probability distributions."""
149
+ value_counts = scores.value_counts()
150
+ probabilities = value_counts / value_counts.sum()
151
+ full_prob = np.zeros(int(scores.max()) + 1)
152
+ full_prob[value_counts.index.astype(int)] = probabilities
153
+ return full_prob
154
+
155
+
156
+ def calculate_divergences(df):
157
+ """Calculate KL, Jensen-Shannon divergences, and Hellinger distance for the score distributions."""
158
+ score_columns = ['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']
159
+ probabilities = {col: scores_to_prob(df[col]) for col in score_columns}
160
+ divergences = {
161
+ 'KL Divergence': {},
162
+ 'Jensen-Shannon Divergence': {},
163
+ 'Hellinger Distance': {}
164
+ }
165
+ for i in range(len(score_columns)):
166
+ for j in range(i + 1, len(score_columns)):
167
+ col1, col2 = score_columns[i], score_columns[j]
168
+ divergences['KL Divergence'][f'{col1} vs {col2}'] = entropy(probabilities[col1], probabilities[col2])
169
+ divergences['Jensen-Shannon Divergence'][f'{col1} vs {col2}'] = jensenshannon(probabilities[col1],
170
+ probabilities[col2])
171
+ divergences['Hellinger Distance'][f'{col1} vs {col2}'] = hellinger_distance(probabilities[col1],
172
+ probabilities[col2])
173
+ return divergences
 
 
 
 
 
util/injection.py CHANGED
@@ -5,14 +5,13 @@ import json_repair
5
  import pandas as pd
6
  from tqdm import tqdm
7
 
8
-
9
- def create_summary(group_name, label, occupation, row, template):
10
  """Generate a dynamic summary for scoring the applicant, excluding the group feature.
11
  The occupation parameter allows customization of the job position.
12
  """
13
 
14
  resume_info = row['Cleaned_Resume']
15
- # resume_info = resume_info[:int(len(resume_info) * proportion)]
16
 
17
  info = f"{group_name}: {label};" if label else ''
18
 
@@ -25,7 +24,7 @@ def create_summary(group_name, label, occupation, row, template):
25
  return summary
26
 
27
 
28
- def invoke_retry(prompt, agent, parameters, string_input=False):
29
  attempts = 0
30
  delay = 2 # Initial delay in seconds
31
  max_attempts = 5 # Maximum number of retry attempts
@@ -33,22 +32,21 @@ def invoke_retry(prompt, agent, parameters, string_input=False):
33
  while attempts < max_attempts:
34
  try:
35
  score_text = agent.invoke(prompt, **parameters)
36
- #print(f"Prompt: {prompt}")
37
- # print(f"Score text: {score_text}")
38
- # print("=============================================================")
39
  if string_input:
40
  return score_text
41
  try:
42
  score_json = json.loads(score_text)
43
  except json.JSONDecodeError:
44
  try:
45
- score_json = json.loads(
46
- json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
47
  except json.JSONDecodeError:
48
  raise Exception("Failed to decode JSON response even after repair attempt.")
49
  # score = re.search(r'\d+', score_text)
50
  # return int(score.group()) if score else -1
51
- #print(f"Score JSON: {score_json}")
52
  return int(score_json['Score'])
53
 
54
  except Exception as e:
@@ -58,7 +56,7 @@ def invoke_retry(prompt, agent, parameters, string_input=False):
58
  attempts += 1
59
 
60
  return -1
61
- # raise Exception("Failed to complete the API call after maximum retry attempts.")
62
 
63
 
64
  def calculate_avg_score(score_list):
@@ -68,35 +66,37 @@ def calculate_avg_score(score_list):
68
  avg_score = sum(valid_scores) / len(valid_scores)
69
  return avg_score
70
  return None
 
71
 
72
-
73
- def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation
74
- , template):
75
  print(f"Processing {len(df)} entries with {num_run} runs each.")
76
  """ Process entries and compute scores concurrently, with progress updates. """
77
- scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
78
 
79
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
80
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
81
 
82
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
83
- prompt_normal = create_summary(group_name, label, occupation, row, template)
84
 
85
- # print(f"Run {run + 1} - Entry {index + 1} - {key}")
86
- # print("=============================================================")
 
 
87
  result_normal = invoke_retry(prompt_normal, agent, parameters)
88
  scores[key][index].append(result_normal)
89
 
90
- #print(f"Scores: {scores}")
 
91
 
92
  # Ensure all scores are lists and calculate average scores
93
- for category in ['Privilege', 'Protect', 'Neutral']:
 
94
  # Ensure the scores are lists and check before assignment
95
  series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
96
  df[f'{category}_Scores'] = series_data
97
 
98
  # Calculate the average score with additional debug info
99
 
 
100
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
101
 
102
  # Add ranks for each score within each row
@@ -107,4 +107,3 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
107
  df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
108
 
109
  return df
110
-
 
5
  import pandas as pd
6
  from tqdm import tqdm
7
 
8
+ def create_summary(group_name, label, occupation, row, proportion,template):
 
9
  """Generate a dynamic summary for scoring the applicant, excluding the group feature.
10
  The occupation parameter allows customization of the job position.
11
  """
12
 
13
  resume_info = row['Cleaned_Resume']
14
+ resume_info = resume_info[:int(len(resume_info) * proportion)]
15
 
16
  info = f"{group_name}: {label};" if label else ''
17
 
 
24
  return summary
25
 
26
 
27
+ def invoke_retry(prompt, agent, parameters,string_input=False):
28
  attempts = 0
29
  delay = 2 # Initial delay in seconds
30
  max_attempts = 5 # Maximum number of retry attempts
 
32
  while attempts < max_attempts:
33
  try:
34
  score_text = agent.invoke(prompt, **parameters)
35
+ print(f"Prompt: {prompt}")
36
+ print(f"Score text: {score_text}")
37
+ print("=============================================================")
38
  if string_input:
39
  return score_text
40
  try:
41
  score_json = json.loads(score_text)
42
  except json.JSONDecodeError:
43
  try:
44
+ score_json = json.loads(json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
 
45
  except json.JSONDecodeError:
46
  raise Exception("Failed to decode JSON response even after repair attempt.")
47
  # score = re.search(r'\d+', score_text)
48
  # return int(score.group()) if score else -1
49
+ print(f"Score JSON: {score_json}")
50
  return int(score_json['Score'])
51
 
52
  except Exception as e:
 
56
  attempts += 1
57
 
58
  return -1
59
+ #raise Exception("Failed to complete the API call after maximum retry attempts.")
60
 
61
 
62
  def calculate_avg_score(score_list):
 
66
  avg_score = sum(valid_scores) / len(valid_scores)
67
  return avg_score
68
  return None
69
+ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation,proportion,template):
70
 
 
 
 
71
  print(f"Processing {len(df)} entries with {num_run} runs each.")
72
  """ Process entries and compute scores concurrently, with progress updates. """
73
+ scores = {key: [[] for _ in range(len(df))] for key in ['Privilege','Protect','Neutral']}
74
 
75
  for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
76
  for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
77
 
78
  for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
 
79
 
80
+ prompt_normal = create_summary(group_name, label, occupation,row,proportion,template)
81
+
82
+ print(f"Run {run + 1} - Entry {index + 1} - {key}")
83
+ print("=============================================================")
84
  result_normal = invoke_retry(prompt_normal, agent, parameters)
85
  scores[key][index].append(result_normal)
86
 
87
+ print(f"Scores: {scores}")
88
+
89
 
90
  # Ensure all scores are lists and calculate average scores
91
+ for category in ['Privilege', 'Protect','Neutral']:
92
+
93
  # Ensure the scores are lists and check before assignment
94
  series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
95
  df[f'{category}_Scores'] = series_data
96
 
97
  # Calculate the average score with additional debug info
98
 
99
+
100
  df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
101
 
102
  # Add ranks for each score within each row
 
107
  df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
108
 
109
  return df
 
util/model.py CHANGED
@@ -1,49 +1,6 @@
1
  import json
2
  import http.client
3
  from openai import AzureOpenAI
4
- import time
5
- from tqdm import tqdm
6
- from typing import Any, List
7
- from botocore.exceptions import ClientError
8
- from enum import Enum
9
- import boto3
10
- import json
11
- import logging
12
-
13
-
14
- class Model(Enum):
15
- CLAUDE3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
16
- CLAUDE3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
17
-
18
-
19
- class Claude3Agent:
20
- def __init__(self, aws_secret_access_key: str,model: str ):
21
- self.client = boto3.client("bedrock-runtime", region_name="us-east-1", aws_access_key_id="AKIAZR6ZJPKTKJAMLP5W",
22
- aws_secret_access_key=aws_secret_access_key)
23
- if model == "SONNET":
24
- self.model = Model.CLAUDE3_SONNET
25
- elif model == "HAIKU":
26
- self.model = Model.CLAUDE3_HAIKU
27
- else:
28
- raise ValueError("Invalid model type. Please choose from 'SONNET' or 'HAIKU' models.")
29
-
30
- def invoke(self, text: str,**kwargs) -> str:
31
- try:
32
- body = json.dumps(
33
- {
34
- "anthropic_version": "bedrock-2023-05-31",
35
- "messages": [
36
- {"role": "user", "content": [{"type": "text", "text": text}]}
37
- ],
38
- **kwargs
39
- }
40
- )
41
- response = self.client.invoke_model(modelId=self.model.value, body=body)
42
- completion = json.loads(response["body"].read())["content"][0]["text"]
43
- return completion
44
- except ClientError:
45
- logging.error("Couldn't invoke model")
46
- raise
47
 
48
  class ContentFormatter:
49
  @staticmethod
@@ -96,4 +53,3 @@ class GPTAgent:
96
  **kwargs
97
  )
98
  return response.choices[0].message.content
99
-
 
1
  import json
2
  import http.client
3
  from openai import AzureOpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  class ContentFormatter:
6
  @staticmethod
 
53
  **kwargs
54
  )
55
  return response.choices[0].message.content