Spaces:
Running
Running
Zekun Wu
commited on
Commit
•
6830d47
1
Parent(s):
775d0e1
add
Browse files- util/evaluation.py +56 -49
- util/injection.py +1 -1
util/evaluation.py
CHANGED
@@ -80,6 +80,13 @@ def calculate_four_fifths_rule(impact_ratios):
|
|
80 |
return adverse_impact
|
81 |
|
82 |
def statistical_tests(data):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
"""Perform various statistical tests to evaluate potential biases."""
|
84 |
variables = ['Privilege', 'Protect', 'Neutral']
|
85 |
rank_suffix = '_Rank'
|
@@ -308,52 +315,52 @@ def statistical_tests(data):
|
|
308 |
# return results
|
309 |
|
310 |
|
311 |
-
def hellinger_distance(p, q):
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
def calculate_correlations(df):
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
def scores_to_prob(scores):
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
def calculate_divergences(df):
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
|
|
80 |
return adverse_impact
|
81 |
|
82 |
def statistical_tests(data):
|
83 |
+
# Add ranks for each score within each row
|
84 |
+
ranks = data[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=True)
|
85 |
+
|
86 |
+
data['Privilege_Rank'] = ranks['Privilege_Avg_Score']
|
87 |
+
data['Protect_Rank'] = ranks['Protect_Avg_Score']
|
88 |
+
data['Neutral_Rank'] = ranks['Neutral_Avg_Score']
|
89 |
+
|
90 |
"""Perform various statistical tests to evaluate potential biases."""
|
91 |
variables = ['Privilege', 'Protect', 'Neutral']
|
92 |
rank_suffix = '_Rank'
|
|
|
315 |
# return results
|
316 |
|
317 |
|
318 |
+
# def hellinger_distance(p, q):
|
319 |
+
# """Calculate the Hellinger distance between two probability distributions."""
|
320 |
+
# return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
|
321 |
+
#
|
322 |
+
#
|
323 |
+
# def calculate_correlations(df):
|
324 |
+
# """Calculate Spearman, Pearson, and Kendall's Tau correlations for the given ranks in the dataframe."""
|
325 |
+
# correlations = {
|
326 |
+
# 'Spearman': {},
|
327 |
+
# 'Pearson': {},
|
328 |
+
# 'Kendall Tau': {}
|
329 |
+
# }
|
330 |
+
# columns = ['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']
|
331 |
+
# for i in range(len(columns)):
|
332 |
+
# for j in range(i + 1, len(columns)):
|
333 |
+
# col1, col2 = columns[i], columns[j]
|
334 |
+
# correlations['Spearman'][f'{col1} vs {col2}'] = spearmanr(df[col1], df[col2]).correlation
|
335 |
+
# correlations['Pearson'][f'{col1} vs {col2}'] = pearsonr(df[col1], df[col2])[0]
|
336 |
+
# correlations['Kendall Tau'][f'{col1} vs {col2}'] = kendalltau(df[col1], df[col2]).correlation
|
337 |
+
# return correlations
|
338 |
+
#
|
339 |
+
#
|
340 |
+
# def scores_to_prob(scores):
|
341 |
+
# """Convert scores to probability distributions."""
|
342 |
+
# value_counts = scores.value_counts()
|
343 |
+
# probabilities = value_counts / value_counts.sum()
|
344 |
+
# full_prob = np.zeros(int(scores.max()) + 1)
|
345 |
+
# full_prob[value_counts.index.astype(int)] = probabilities
|
346 |
+
# return full_prob
|
347 |
+
|
348 |
+
|
349 |
+
# def calculate_divergences(df):
|
350 |
+
# """Calculate KL, Jensen-Shannon divergences, and Hellinger distance for the score distributions."""
|
351 |
+
# score_columns = ['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']
|
352 |
+
# probabilities = {col: scores_to_prob(df[col]) for col in score_columns}
|
353 |
+
# divergences = {
|
354 |
+
# 'KL Divergence': {},
|
355 |
+
# 'Jensen-Shannon Divergence': {},
|
356 |
+
# 'Hellinger Distance': {}
|
357 |
+
# }
|
358 |
+
# for i in range(len(score_columns)):
|
359 |
+
# for j in range(i + 1, len(score_columns)):
|
360 |
+
# col1, col2 = score_columns[i], score_columns[j]
|
361 |
+
# divergences['KL Divergence'][f'{col1} vs {col2}'] = entropy(probabilities[col1], probabilities[col2])
|
362 |
+
# divergences['Jensen-Shannon Divergence'][f'{col1} vs {col2}'] = jensenshannon(probabilities[col1],
|
363 |
+
# probabilities[col2])
|
364 |
+
# divergences['Hellinger Distance'][f'{col1} vs {col2}'] = hellinger_distance(probabilities[col1],
|
365 |
+
# probabilities[col2])
|
366 |
+
# return divergences
|
util/injection.py
CHANGED
@@ -100,7 +100,7 @@ def process_scores_multiple(df, num_run, parameters, privilege_label, protect_la
|
|
100 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
|
101 |
|
102 |
# Add ranks for each score within each row
|
103 |
-
ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=
|
104 |
|
105 |
df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
|
106 |
df['Protect_Rank'] = ranks['Protect_Avg_Score']
|
|
|
100 |
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
|
101 |
|
102 |
# Add ranks for each score within each row
|
103 |
+
ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=True)
|
104 |
|
105 |
df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
|
106 |
df['Protect_Rank'] = ranks['Protect_Avg_Score']
|