Zekun Wu commited on
Commit
f335959
1 Parent(s): 634ac1c
Files changed (1) hide show
  1. util/evaluation.py +131 -43
util/evaluation.py CHANGED
@@ -1,12 +1,100 @@
1
  import pandas as pd
2
  import numpy as np
 
3
  from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
4
  from statsmodels.stats.multicomp import MultiComparison
5
 
6
- import pandas as pd
7
- import numpy as np
8
  from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
9
  from scipy.spatial.distance import jensenshannon
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def hellinger_distance(p, q):
@@ -59,47 +147,47 @@ def calculate_divergences(df):
59
  probabilities[col2])
60
  return divergences
61
 
62
- def statistical_tests(data):
63
- """Perform various statistical tests to evaluate potential biases."""
64
- variables = ['Privilege', 'Protect', 'Neutral']
65
- rank_suffix = '_Rank'
66
- score_suffix = '_Avg_Score'
67
-
68
- # # Calculate average ranks
69
- rank_columns = [v + rank_suffix for v in variables]
70
- average_ranks = data[rank_columns].mean()
71
-
72
- # Statistical tests
73
- rank_data = [data[col] for col in rank_columns]
74
-
75
- # Pairwise tests
76
- pairs = [
77
- ('Privilege', 'Protect'),
78
- ('Protect', 'Neutral'),
79
- ('Privilege', 'Neutral')
80
- ]
81
-
82
- pairwise_results = {
83
- 'T-Test': {}
84
- }
85
-
86
- for (var1, var2) in pairs:
87
- pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
88
-
89
- # T-test for independent samples
90
- t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
91
- pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
92
-
93
- results = {
94
- "Average Ranks": average_ranks.to_dict(),
95
- "Friedman Test": {
96
- "Statistic": friedmanchisquare(*rank_data).statistic,
97
- "p-value": friedmanchisquare(*rank_data).pvalue
98
- },
99
- **pairwise_results,
100
- }
101
-
102
- return results
103
 
104
  def disabled_statistical_tests(data):
105
  """Perform various statistical tests to evaluate potential biases."""
 
1
  import pandas as pd
2
  import numpy as np
3
+ from scipy import stats
4
  from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
5
  from statsmodels.stats.multicomp import MultiComparison
6
 
 
 
7
  from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
8
  from scipy.spatial.distance import jensenshannon
9
+ from scipy.stats import ttest_ind, friedmanchisquare, rankdata
10
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
11
+
12
+ def bootstrap_t_test(data1, data2, num_bootstrap=1000):
13
+ """Perform a bootstrapped t-test."""
14
+ observed_t_stat, _ = ttest_ind(data1, data2)
15
+ combined = np.concatenate([data1, data2])
16
+ t_stats = []
17
+
18
+ for _ in range(num_bootstrap):
19
+ np.random.shuffle(combined)
20
+ new_data1 = combined[:len(data1)]
21
+ new_data2 = combined[len(data1):]
22
+ t_stat, _ = ttest_ind(new_data1, new_data2)
23
+ t_stats.append(t_stat)
24
+
25
+ p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
26
+ return observed_t_stat, p_value
27
+
28
+ def posthoc_friedman(data, variables, rank_suffix='_Rank'):
29
+ """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
30
+ ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
31
+ num_subjects = ranked_data.shape[0]
32
+ num_conditions = ranked_data.shape[1]
33
+ comparisons = []
34
+
35
+ for i in range(num_conditions):
36
+ for j in range(i + 1, num_conditions):
37
+ diff = ranked_data[:, i] - ranked_data[:, j]
38
+ abs_diff = np.abs(diff)
39
+ avg_diff = np.mean(diff)
40
+ se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
41
+ z_value = avg_diff / se_diff
42
+ p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
43
+ comparisons.append({
44
+ "Group1": variables[i],
45
+ "Group2": variables[j],
46
+ "Z": z_value,
47
+ "p-value": p_value
48
+ })
49
+
50
+ return comparisons
51
+
52
+ def statistical_tests(data):
53
+ """Perform various statistical tests to evaluate potential biases."""
54
+ variables = ['Privilege', 'Protect', 'Neutral']
55
+ rank_suffix = '_Rank'
56
+ score_suffix = '_Avg_Score'
57
+
58
+ # Calculate average ranks
59
+ rank_columns = [v + rank_suffix for v in variables]
60
+ average_ranks = data[rank_columns].mean()
61
+
62
+ # Statistical tests
63
+ rank_data = [data[col] for col in rank_columns]
64
+
65
+ # Pairwise tests
66
+ pairs = [
67
+ ('Privilege', 'Protect'),
68
+ ('Protect', 'Neutral'),
69
+ ('Privilege', 'Neutral')
70
+ ]
71
+
72
+ pairwise_results = {
73
+ 'T-Test': {}
74
+ }
75
+
76
+ for (var1, var2) in pairs:
77
+ pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
78
+
79
+ # Bootstrapped T-test for independent samples
80
+ t_stat, t_p = bootstrap_t_test(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
81
+ pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
82
+
83
+ # Friedman test
84
+ friedman_stat, friedman_p = friedmanchisquare(*rank_data)
85
+ posthoc_results = posthoc_friedman(data, variables, rank_suffix)
86
+
87
+ results = {
88
+ "Average Ranks": average_ranks.to_dict(),
89
+ "Friedman Test": {
90
+ "Statistic": friedman_stat,
91
+ "p-value": friedman_p,
92
+ "Post-hoc": posthoc_results
93
+ },
94
+ **pairwise_results,
95
+ }
96
+
97
+ return results
98
 
99
 
100
  def hellinger_distance(p, q):
 
147
  probabilities[col2])
148
  return divergences
149
 
150
+ # def statistical_tests(data):
151
+ # """Perform various statistical tests to evaluate potential biases."""
152
+ # variables = ['Privilege', 'Protect', 'Neutral']
153
+ # rank_suffix = '_Rank'
154
+ # score_suffix = '_Avg_Score'
155
+ #
156
+ # # # Calculate average ranks
157
+ # rank_columns = [v + rank_suffix for v in variables]
158
+ # average_ranks = data[rank_columns].mean()
159
+ #
160
+ # # Statistical tests
161
+ # rank_data = [data[col] for col in rank_columns]
162
+ #
163
+ # # Pairwise tests
164
+ # pairs = [
165
+ # ('Privilege', 'Protect'),
166
+ # ('Protect', 'Neutral'),
167
+ # ('Privilege', 'Neutral')
168
+ # ]
169
+ #
170
+ # pairwise_results = {
171
+ # 'T-Test': {}
172
+ # }
173
+ #
174
+ # for (var1, var2) in pairs:
175
+ # pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
176
+ #
177
+ # # T-test for independent samples
178
+ # t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
179
+ # pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
180
+ #
181
+ # results = {
182
+ # "Average Ranks": average_ranks.to_dict(),
183
+ # "Friedman Test": {
184
+ # "Statistic": friedmanchisquare(*rank_data).statistic,
185
+ # "p-value": friedmanchisquare(*rank_data).pvalue
186
+ # },
187
+ # **pairwise_results,
188
+ # }
189
+ #
190
+ # return results
191
 
192
  def disabled_statistical_tests(data):
193
  """Perform various statistical tests to evaluate potential biases."""