File size: 11,985 Bytes
5defafa
 
f335959
40d7b09
 
0765d8d
 
015b1a2
f335959
8a73f6f
ae16dbc
f335959
ae16dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f335959
9dd76eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae16dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f335959
ae16dbc
f335959
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae16dbc
 
 
 
f335959
 
7a70a60
f335959
ae16dbc
 
 
 
f335959
 
 
ae16dbc
f335959
 
 
 
 
 
 
 
 
 
 
 
0765d8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f335959
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634ac1c
 
 
 
 
 
 
5fd4442
40d7b09
5fd4442
40d7b09
 
 
 
 
168431b
 
 
 
 
 
 
40d7b09
180622c
ba8c4f4
 
 
180622c
 
 
168431b
5ca3be7
 
168431b
16d8bbb
 
 
 
 
 
 
 
 
 
 
168431b
16d8bbb
 
40d7b09
168431b
16d8bbb
 
5ca3be7
40d7b09
 
ba8c4f4
 
 
 
 
 
 
 
 
40d7b09
 
5fd4442
fcfc515
 
 
 
ba8c4f4
634ac1c
ba8c4f4
 
40d7b09
 
5fd4442
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
from statsmodels.stats.multicomp import MultiComparison
from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
from scipy.spatial.distance import jensenshannon
from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import ttest_1samp
from scikit_posthocs import posthoc_nemenyi

# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
#     """Perform a bootstrapped t-test."""
#     observed_t_stat, _ = ttest_ind(data1, data2)
#     combined = np.concatenate([data1, data2])
#     t_stats = []
#
#     for _ in range(num_bootstrap):
#         np.random.shuffle(combined)
#         new_data1 = combined[:len(data1)]
#         new_data2 = combined[len(data1):]
#         t_stat, _ = ttest_ind(new_data1, new_data2)
#         t_stats.append(t_stat)
#
#     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
#     return observed_t_stat, p_value


# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
#     """Perform a bootstrapped paired t-test for mean difference being zero."""
#     # Calculate the observed differences between paired samples
#     differences = data1 - data2
#     # Compute the observed t-statistic for the differences
#     observed_t_stat, _ = ttest_1samp(differences, 0)
#
#     t_stats = []
#
#     for _ in range(num_bootstrap):
#         # Resample the differences with replacement
#         resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
#         # Perform a one-sample t-test on the resampled differences against zero
#         t_stat, _ = ttest_1samp(resampled_diffs, 0)
#         # Append the t-statistic to the list
#         t_stats.append(t_stat)
#
#     # Calculate the p-value as the proportion of bootstrap t-statistics
#     # that are as extreme as or more extreme than the observed t-statistic
#     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
#     return observed_t_stat, p_value

# def posthoc_friedman(data, variables, rank_suffix='_Rank'):
#     """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
#     ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
#     num_subjects = ranked_data.shape[0]
#     num_conditions = ranked_data.shape[1]
#     comparisons = []
#
#     for i in range(num_conditions):
#         for j in range(i + 1, num_conditions):
#             diff = ranked_data[:, i] - ranked_data[:, j]
#             abs_diff = np.abs(diff)
#             avg_diff = np.mean(diff)
#             se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
#             z_value = avg_diff / se_diff
#             p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
#             comparisons.append({
#                 "Group1": variables[i],
#                 "Group2": variables[j],
#                 "Z": z_value,
#                 "p-value": p_value
#             })
#
#     return comparisons
def posthoc_friedman_nemenyi(data, variables, rank_suffix='_Rank'):
    """Perform post-hoc Nemenyi test for the Friedman test."""
    ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
    return posthoc_nemenyi(ranked_data)
def statistical_tests(data):
    """Perform various statistical tests to evaluate potential biases."""
    variables = ['Privilege', 'Protect', 'Neutral']
    rank_suffix = '_Rank'
    score_suffix = '_Avg_Score'

    # Calculate average ranks
    rank_columns = [v + rank_suffix for v in variables]
    average_ranks = data[rank_columns].mean()

    # Statistical tests
    rank_data = [data[col] for col in rank_columns]

    # Pairwise tests
    pairs = [
        ('Privilege', 'Protect'),
        ('Protect', 'Neutral'),
        ('Privilege', 'Neutral')
    ]

    pairwise_results = {
        'T-Test': {}
    }

    pairwise_results = {
        'Wilcoxon Signed-Rank Test': {}
    }

    for (var1, var2) in pairs:
        pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
        pair_rank_score = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'

        # Wilcoxon signed-rank test for pairwise comparisons
        wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
        pairwise_results['Wilcoxon Signed-Rank Test'][pair_name_score] = {"Statistic": wilcoxon_stat,
                                                                          "p-value": wilcoxon_p}

    # Friedman test
    friedman_stat, friedman_p = friedmanchisquare(*rank_data)
    posthoc_results = posthoc_friedman_nemenyi(data, variables, rank_suffix)

    results = {
        "Average Ranks": average_ranks.to_dict(),
        "Friedman Test": {
            "Statistic": friedman_stat,
            "p-value": friedman_p,
            "Post-hoc": posthoc_results
        },
        **pairwise_results,
    }

    return results


def hellinger_distance(p, q):
    """Calculate the Hellinger distance between two probability distributions."""
    return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))


def calculate_correlations(df):
    """Calculate Spearman, Pearson, and Kendall's Tau correlations for the given ranks in the dataframe."""
    correlations = {
        'Spearman': {},
        'Pearson': {},
        'Kendall Tau': {}
    }
    columns = ['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']
    for i in range(len(columns)):
        for j in range(i + 1, len(columns)):
            col1, col2 = columns[i], columns[j]
            correlations['Spearman'][f'{col1} vs {col2}'] = spearmanr(df[col1], df[col2]).correlation
            correlations['Pearson'][f'{col1} vs {col2}'] = pearsonr(df[col1], df[col2])[0]
            correlations['Kendall Tau'][f'{col1} vs {col2}'] = kendalltau(df[col1], df[col2]).correlation
    return correlations


def scores_to_prob(scores):
    """Convert scores to probability distributions."""
    value_counts = scores.value_counts()
    probabilities = value_counts / value_counts.sum()
    full_prob = np.zeros(int(scores.max()) + 1)
    full_prob[value_counts.index.astype(int)] = probabilities
    return full_prob


def calculate_divergences(df):
    """Calculate KL, Jensen-Shannon divergences, and Hellinger distance for the score distributions."""
    score_columns = ['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']
    probabilities = {col: scores_to_prob(df[col]) for col in score_columns}
    divergences = {
        'KL Divergence': {},
        'Jensen-Shannon Divergence': {},
        'Hellinger Distance': {}
    }
    for i in range(len(score_columns)):
        for j in range(i + 1, len(score_columns)):
            col1, col2 = score_columns[i], score_columns[j]
            divergences['KL Divergence'][f'{col1} vs {col2}'] = entropy(probabilities[col1], probabilities[col2])
            divergences['Jensen-Shannon Divergence'][f'{col1} vs {col2}'] = jensenshannon(probabilities[col1],
                                                                                          probabilities[col2])
            divergences['Hellinger Distance'][f'{col1} vs {col2}'] = hellinger_distance(probabilities[col1],
                                                                                        probabilities[col2])
    return divergences

# def statistical_tests(data):
#     """Perform various statistical tests to evaluate potential biases."""
#     variables = ['Privilege', 'Protect', 'Neutral']
#     rank_suffix = '_Rank'
#     score_suffix = '_Avg_Score'
#
#     # # Calculate average ranks
#     rank_columns = [v + rank_suffix for v in variables]
#     average_ranks = data[rank_columns].mean()
#
#     # Statistical tests
#     rank_data = [data[col] for col in rank_columns]
#
#     # Pairwise tests
#     pairs = [
#         ('Privilege', 'Protect'),
#         ('Protect', 'Neutral'),
#         ('Privilege', 'Neutral')
#     ]
#
#     pairwise_results = {
#         'T-Test': {}
#     }
#
#     for (var1, var2) in pairs:
#         pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
#
#         # T-test for independent samples
#         t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
#         pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
#
#     results = {
#         "Average Ranks": average_ranks.to_dict(),
#         "Friedman Test": {
#             "Statistic": friedmanchisquare(*rank_data).statistic,
#             "p-value": friedmanchisquare(*rank_data).pvalue
#         },
#         **pairwise_results,
#     }
#
#     return results

def disabled_statistical_tests(data):
    """Perform various statistical tests to evaluate potential biases."""
    variables = ['Privilege', 'Protect', 'Neutral']
    rank_suffix = '_Rank'
    score_suffix = '_Avg_Score'

    # # Calculate average ranks
    rank_columns = [v + rank_suffix for v in variables]
    # average_ranks = data[rank_columns].mean()

    # Statistical tests
    rank_data = [data[col] for col in rank_columns]
    kw_stat, kw_p = kruskal(*rank_data)

    # Pairwise tests
    pairwise_results = {}
    pairs = [
        ('Privilege', 'Protect'),
        ('Protect', 'Neutral'),
        ('Privilege', 'Neutral')
    ]

    pairwise_results = {
        # 'Mann-Whitney U Test': {},
        # 'Wilcoxon Test': {},
        # 'Levene\'s Test': {},
        'T-Test': {}
    }

    for (var1, var2) in pairs:
        pair_name_rank = f'{var1}{rank_suffix} vs {var2}{rank_suffix}'
        pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'

        # # Mann-Whitney U Test
        # mw_stat, mw_p = mannwhitneyu(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
        # pairwise_results['Mann-Whitney U Test'][pair_name_rank] = {"Statistic": mw_stat, "p-value": mw_p}
        #
        # # Wilcoxon Signed-Rank Test
        # if len(data) > 20:
        #     wilcoxon_stat, wilcoxon_p = wilcoxon(data[f'{var1}{rank_suffix}'], data[f'{var2}{rank_suffix}'])
        # else:
        #     wilcoxon_stat, wilcoxon_p = np.nan, "Sample size too small for Wilcoxon test."
        # pairwise_results['Wilcoxon Test'][pair_name_rank] = {"Statistic": wilcoxon_stat, "p-value": wilcoxon_p}
        #
        # Levene's Test for equality of variances
        # levene_stat, levene_p = levene(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
        # pairwise_results['Levene\'s Test'][pair_name_score] = {"Statistic": levene_stat, "p-value": levene_p}

        # T-test for independent samples
        t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
                                #equal_var=(levene_p > 0.05))
        pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}

    # ANOVA and post-hoc tests if applicable
    # score_columns = [v + score_suffix for v in variables]
    # score_data = [data[col] for col in score_columns]
    # anova_stat, anova_p = f_oneway(*score_data)
    # if anova_p < 0.05:
    #     mc = MultiComparison(data.melt()['value'], data.melt()['variable'])
    #     tukey_result = mc.tukeyhsd()
    #     tukey_result_summary = tukey_result.summary().as_html()
    # else:
    #     tukey_result_summary = "ANOVA not significant, no post-hoc test performed."

    results = {
        #"Average Ranks": average_ranks.to_dict(),
        "Friedman Test": {
            "Statistic": friedmanchisquare(*rank_data).statistic,
            "p-value": friedmanchisquare(*rank_data).pvalue
        },
        # "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
        **pairwise_results,
        # "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
        #"Tukey HSD Test": tukey_result_summary
    }

    return results