Spaces:
Running
Running
Zekun Wu
commited on
Commit
•
634ac1c
1
Parent(s):
ba8c4f4
update
Browse files- util/evaluation.py +43 -2
util/evaluation.py
CHANGED
@@ -59,13 +59,54 @@ def calculate_divergences(df):
|
|
59 |
probabilities[col2])
|
60 |
return divergences
|
61 |
|
62 |
-
|
63 |
def statistical_tests(data):
|
64 |
"""Perform various statistical tests to evaluate potential biases."""
|
65 |
variables = ['Privilege', 'Protect', 'Neutral']
|
66 |
rank_suffix = '_Rank'
|
67 |
score_suffix = '_Avg_Score'
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
# # Calculate average ranks
|
70 |
rank_columns = [v + rank_suffix for v in variables]
|
71 |
# average_ranks = data[rank_columns].mean()
|
@@ -131,7 +172,7 @@ def statistical_tests(data):
|
|
131 |
"p-value": friedmanchisquare(*rank_data).pvalue
|
132 |
},
|
133 |
# "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
|
134 |
-
|
135 |
# "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
|
136 |
#"Tukey HSD Test": tukey_result_summary
|
137 |
}
|
|
|
59 |
probabilities[col2])
|
60 |
return divergences
|
61 |
|
|
|
62 |
def statistical_tests(data):
|
63 |
"""Perform various statistical tests to evaluate potential biases."""
|
64 |
variables = ['Privilege', 'Protect', 'Neutral']
|
65 |
rank_suffix = '_Rank'
|
66 |
score_suffix = '_Avg_Score'
|
67 |
|
68 |
+
# # Calculate average ranks
|
69 |
+
rank_columns = [v + rank_suffix for v in variables]
|
70 |
+
average_ranks = data[rank_columns].mean()
|
71 |
+
|
72 |
+
# Statistical tests
|
73 |
+
rank_data = [data[col] for col in rank_columns]
|
74 |
+
|
75 |
+
# Pairwise tests
|
76 |
+
pairs = [
|
77 |
+
('Privilege', 'Protect'),
|
78 |
+
('Protect', 'Neutral'),
|
79 |
+
('Privilege', 'Neutral')
|
80 |
+
]
|
81 |
+
|
82 |
+
pairwise_results = {
|
83 |
+
'T-Test': {}
|
84 |
+
}
|
85 |
+
|
86 |
+
for (var1, var2) in pairs:
|
87 |
+
pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
|
88 |
+
|
89 |
+
# T-test for independent samples
|
90 |
+
t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
|
91 |
+
pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
|
92 |
+
|
93 |
+
results = {
|
94 |
+
"Average Ranks": average_ranks.to_dict(),
|
95 |
+
"Friedman Test": {
|
96 |
+
"Statistic": friedmanchisquare(*rank_data).statistic,
|
97 |
+
"p-value": friedmanchisquare(*rank_data).pvalue
|
98 |
+
},
|
99 |
+
**pairwise_results,
|
100 |
+
}
|
101 |
+
|
102 |
+
return results
|
103 |
+
|
104 |
+
def disabled_statistical_tests(data):
|
105 |
+
"""Perform various statistical tests to evaluate potential biases."""
|
106 |
+
variables = ['Privilege', 'Protect', 'Neutral']
|
107 |
+
rank_suffix = '_Rank'
|
108 |
+
score_suffix = '_Avg_Score'
|
109 |
+
|
110 |
# # Calculate average ranks
|
111 |
rank_columns = [v + rank_suffix for v in variables]
|
112 |
# average_ranks = data[rank_columns].mean()
|
|
|
172 |
"p-value": friedmanchisquare(*rank_data).pvalue
|
173 |
},
|
174 |
# "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
|
175 |
+
**pairwise_results,
|
176 |
# "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
|
177 |
#"Tukey HSD Test": tukey_result_summary
|
178 |
}
|