Zekun Wu commited on
Commit
634ac1c
1 Parent(s): ba8c4f4
Files changed (1) hide show
  1. util/evaluation.py +43 -2
util/evaluation.py CHANGED
@@ -59,13 +59,54 @@ def calculate_divergences(df):
59
  probabilities[col2])
60
  return divergences
61
 
62
-
63
  def statistical_tests(data):
64
  """Perform various statistical tests to evaluate potential biases."""
65
  variables = ['Privilege', 'Protect', 'Neutral']
66
  rank_suffix = '_Rank'
67
  score_suffix = '_Avg_Score'
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  # # Calculate average ranks
70
  rank_columns = [v + rank_suffix for v in variables]
71
  # average_ranks = data[rank_columns].mean()
@@ -131,7 +172,7 @@ def statistical_tests(data):
131
  "p-value": friedmanchisquare(*rank_data).pvalue
132
  },
133
  # "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
134
- # **pairwise_results,
135
  # "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
136
  #"Tukey HSD Test": tukey_result_summary
137
  }
 
59
  probabilities[col2])
60
  return divergences
61
 
 
62
  def statistical_tests(data):
63
  """Perform various statistical tests to evaluate potential biases."""
64
  variables = ['Privilege', 'Protect', 'Neutral']
65
  rank_suffix = '_Rank'
66
  score_suffix = '_Avg_Score'
67
 
68
+ # # Calculate average ranks
69
+ rank_columns = [v + rank_suffix for v in variables]
70
+ average_ranks = data[rank_columns].mean()
71
+
72
+ # Statistical tests
73
+ rank_data = [data[col] for col in rank_columns]
74
+
75
+ # Pairwise tests
76
+ pairs = [
77
+ ('Privilege', 'Protect'),
78
+ ('Protect', 'Neutral'),
79
+ ('Privilege', 'Neutral')
80
+ ]
81
+
82
+ pairwise_results = {
83
+ 'T-Test': {}
84
+ }
85
+
86
+ for (var1, var2) in pairs:
87
+ pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
88
+
89
+ # T-test for independent samples
90
+ t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
91
+ pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
92
+
93
+ results = {
94
+ "Average Ranks": average_ranks.to_dict(),
95
+ "Friedman Test": {
96
+ "Statistic": friedmanchisquare(*rank_data).statistic,
97
+ "p-value": friedmanchisquare(*rank_data).pvalue
98
+ },
99
+ **pairwise_results,
100
+ }
101
+
102
+ return results
103
+
104
+ def disabled_statistical_tests(data):
105
+ """Perform various statistical tests to evaluate potential biases."""
106
+ variables = ['Privilege', 'Protect', 'Neutral']
107
+ rank_suffix = '_Rank'
108
+ score_suffix = '_Avg_Score'
109
+
110
  # # Calculate average ranks
111
  rank_columns = [v + rank_suffix for v in variables]
112
  # average_ranks = data[rank_columns].mean()
 
172
  "p-value": friedmanchisquare(*rank_data).pvalue
173
  },
174
  # "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
175
+ **pairwise_results,
176
  # "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
177
  #"Tukey HSD Test": tukey_result_summary
178
  }