Zekun Wu commited on
Commit
83caa5f
1 Parent(s): 36ca842
Files changed (1) hide show
  1. util/evaluation.py +43 -26
util/evaluation.py CHANGED
@@ -105,21 +105,18 @@ def statistical_tests(data):
105
 
106
  # Pairwise Wilcoxon Signed-Rank Test
107
  for var1, var2 in pairs:
 
108
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
 
 
109
  if len(data) > 20:
110
- wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
 
111
  else:
112
- wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
113
- pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
114
-
115
- # # Levene's Test for Equality of Variances
116
- # levene_results = {
117
- # 'Privilege vs Protect': levene(data['Privilege_Rank'], data['Protect_Rank']),
118
- # 'Privilege vs Neutral': levene(data['Privilege_Rank'], data['Neutral_Rank']),
119
- # 'Protect vs Neutral': levene(data['Protect_Rank'], data['Neutral_Rank'])
120
- # }
121
- #
122
- # levene_results = {key: {"Statistic": res.statistic, "p-value": res.pvalue} for key, res in levene_results.items()}
123
 
124
  # Calculate variances for ranks
125
  variances = {col: data[col].var() for col in rank_columns}
@@ -147,25 +144,45 @@ def statistical_tests(data):
147
  posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
148
 
149
  # Perform permutation tests for variances
150
- T_priv_prot_var, p_priv_prot_var = permutation_test_variance(data['Privilege_Rank'], data['Protect_Rank'])
151
- T_neut_prot_var, p_neut_prot_var = permutation_test_variance(data['Neutral_Rank'], data['Protect_Rank'])
152
- T_neut_priv_var, p_neut_priv_var = permutation_test_variance(data['Neutral_Rank'], data['Privilege_Rank'])
 
 
 
 
 
153
 
154
  # Perform permutation tests for means
155
- T_priv_prot_mean, p_priv_prot_mean = permutation_test_mean(data['Privilege_Rank'], data['Protect_Rank'])
156
- T_neut_prot_mean, p_neut_prot_mean = permutation_test_mean(data['Neutral_Rank'], data['Protect_Rank'])
157
- T_neut_priv_mean, p_neut_priv_mean = permutation_test_mean(data['Neutral_Rank'], data['Privilege_Rank'])
 
 
 
 
 
158
 
159
  permutation_results = {
160
- "Permutation Tests for Variances": {
161
- "Privilege vs. Protect": {"Statistic": T_priv_prot_var, "p-value": p_priv_prot_var},
162
- "Neutral vs. Protect": {"Statistic": T_neut_prot_var, "p-value": p_neut_prot_var},
163
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_var, "p-value": p_neut_priv_var}
 
 
 
 
 
 
 
 
 
 
164
  },
165
- "Permutation Tests for Means": {
166
- "Privilege vs. Protect": {"Statistic": T_priv_prot_mean, "p-value": p_priv_prot_mean},
167
- "Neutral vs. Protect": {"Statistic": T_neut_prot_mean, "p-value": p_neut_prot_mean},
168
- "Neutral vs. Privilege": {"Statistic": T_neut_priv_mean, "p-value": p_neut_priv_mean}
169
  }
170
  }
171
 
 
105
 
106
  # Pairwise Wilcoxon Signed-Rank Test
107
  for var1, var2 in pairs:
108
+
109
  pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
110
+ pair_score_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
111
+
112
  if len(data) > 20:
113
+ wilcoxon_stat_rank, wilcoxon_p_rank = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
114
+ wilcoxon_stat_score, wilcoxon_p_score = wilcoxon(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
115
  else:
116
+ wilcoxon_stat_rank, wilcoxon_p_rank = np.nan, "Sample size too small for Wilcoxon test."
117
+ wilcoxon_stat_score, wilcoxon_p_score = np.nan, "Sample size too small for Wilcoxon test."
118
+ pairwise_results['Wilcoxon Test'][pair_rank_score] = {"Statistic": wilcoxon_stat_rank, "p-value": wilcoxon_p_rank}
119
+ pairwise_results['Wilcoxon Test'][pair_score_score] = {"Statistic": wilcoxon_stat_score, "p-value": wilcoxon_p_score}
 
 
 
 
 
 
 
120
 
121
  # Calculate variances for ranks
122
  variances = {col: data[col].var() for col in rank_columns}
 
144
  posthoc_results = posthoc_nemenyi(rank_matrix_transposed)
145
 
146
  # Perform permutation tests for variances
147
+ T_priv_prot_var_rank, p_priv_prot_var_rank = permutation_test_variance(data['Privilege_Rank'], data['Protect_Rank'])
148
+ T_neut_prot_var_rank, p_neut_prot_var_rank = permutation_test_variance(data['Neutral_Rank'], data['Protect_Rank'])
149
+ T_neut_priv_var_rank, p_neut_priv_var_rank = permutation_test_variance(data['Neutral_Rank'], data['Privilege_Rank'])
150
+
151
+ # Perform permutation tests for variances by using rank data
152
+ T_priv_prot_var_score, p_priv_prot_var_score = permutation_test_variance(data['Privilege_Avg_Score'], data['Protect_Avg_Score'])
153
+ T_neut_prot_var_score, p_neut_prot_var_score = permutation_test_variance(data['Neutral_Avg_Score'], data['Protect_Avg_Score'])
154
+ T_neut_priv_var_score, p_neut_priv_var_score = permutation_test_variance(data['Neutral_Avg_Score'], data['Privilege_Avg_Score'])
155
 
156
  # Perform permutation tests for means
157
+ T_priv_prot_mean_rank, p_priv_prot_mean_rank = permutation_test_mean(data['Privilege_Rank'], data['Protect_Rank'])
158
+ T_neut_prot_mean_rank, p_neut_prot_mean_rank = permutation_test_mean(data['Neutral_Rank'], data['Protect_Rank'])
159
+ T_neut_priv_mean_rank, p_neut_priv_mean_rank = permutation_test_mean(data['Neutral_Rank'], data['Privilege_Rank'])
160
+
161
+ # Perform permutation tests for means by using rank data
162
+ T_priv_prot_mean_score, p_priv_prot_mean_score = permutation_test_mean(data['Privilege_Avg_Score'], data['Protect_Avg_Score'])
163
+ T_neut_prot_mean_score, p_neut_prot_mean_score = permutation_test_mean(data['Neutral_Avg_Score'], data['Protect_Avg_Score'])
164
+ T_neut_priv_mean_score, p_neut_priv_mean_score = permutation_test_mean(data['Neutral_Avg_Score'], data['Privilege_Avg_Score'])
165
 
166
  permutation_results = {
167
+ "Permutation Tests for Variances (score)": {
168
+ "Privilege vs. Protect": {"Statistic": T_priv_prot_var_score, "p-value": p_priv_prot_var_score},
169
+ "Neutral vs. Protect": {"Statistic": T_neut_prot_var_score, "p-value": p_neut_prot_var_score},
170
+ "Neutral vs. Privilege": {"Statistic": T_neut_priv_var_score, "p-value": p_neut_priv_var_score}
171
+ },
172
+ "Permutation Tests for Means (score)": {
173
+ "Privilege vs. Protect": {"Statistic": T_priv_prot_mean_score, "p-value": p_priv_prot_mean_score},
174
+ "Neutral vs. Protect": {"Statistic": T_neut_prot_mean_score, "p-value": p_neut_prot_mean_score},
175
+ "Neutral vs. Privilege": {"Statistic": T_neut_priv_mean_score, "p-value": p_neut_priv_mean_score}
176
+ },
177
+ "Permutation Tests for Variances (rank)": {
178
+ "Privilege vs. Protect": {"Statistic": T_priv_prot_var_rank, "p-value": p_priv_prot_var_rank},
179
+ "Neutral vs. Protect": {"Statistic": T_neut_prot_var_rank, "p-value": p_neut_prot_var_rank},
180
+ "Neutral vs. Privilege": {"Statistic": T_neut_priv_var_rank, "p-value": p_neut_priv_var_rank}
181
  },
182
+ "Permutation Tests for Means (rank)": {
183
+ "Privilege vs. Protect": {"Statistic": T_priv_prot_mean_rank, "p-value": p_priv_prot_mean_rank},
184
+ "Neutral vs. Protect": {"Statistic": T_neut_prot_mean_rank, "p-value": p_neut_prot_mean_rank},
185
+ "Neutral vs. Privilege": {"Statistic": T_neut_priv_mean_rank, "p-value": p_neut_priv_mean_rank}
186
  }
187
  }
188