File size: 5,672 Bytes
5defafa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import numpy as np
from scipy.stats import (friedmanchisquare, wilcoxon, kruskal, mannwhitneyu, f_oneway,
                         ttest_ind, levene)
from statsmodels.stats.multicomp import pairwise_tukeyhsd, MultiComparison


def statistical_tests(data):
    # Calculate average ranks
    average_ranks = data[['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']].mean()

    # Statistical tests
    stat_friedman, p_friedman = friedmanchisquare(data['Privilege_Rank'], data['Protect_Rank'], data['Neutral_Rank'])
    kw_stat, kw_p = kruskal(data['Privilege_Rank'], data['Protect_Rank'], data['Neutral_Rank'])
    mw_stat, mw_p = mannwhitneyu(data['Privilege_Rank'], data['Protect_Rank'])

    # Wilcoxon Signed-Rank Test between pairs
    if len(data) > 20:  # Check if the sample size is sufficient for Wilcoxon test
        p_value_privilege_protect = wilcoxon(data['Privilege_Rank'], data['Protect_Rank']).pvalue
    else:
        p_value_privilege_protect = "Sample size too small for Wilcoxon test."

    # Levene's Test for equality of variances
    levene_stat, levene_p = levene(data['Privilege_Avg_Score'], data['Protect_Avg_Score'])

    # T-test for independent samples (Privilege vs Protect)
    if levene_p > 0.05:  # Assume equal variances if Levene's test is not significant
        t_stat, t_p = ttest_ind(data['Privilege_Avg_Score'], data['Protect_Avg_Score'], equal_var=True)
    else:
        t_stat, t_p = ttest_ind(data['Privilege_Avg_Score'], data['Protect_Avg_Score'], equal_var=False)

    # ANOVA and post-hoc tests if applicable
    anova_stat, anova_p = f_oneway(data['Privilege_Avg_Score'], data['Protect_Avg_Score'], data['Neutral_Avg_Score'])
    if anova_p < 0.05:
        mc = MultiComparison(
            data['Privilege_Avg_Score'].append(data['Protect_Avg_Score']).append(data['Neutral_Avg_Score']),
            np.repeat(['Privilege', 'Protect', 'Neutral'], len(data)))
        tukey_result = mc.tukeyhsd()
    else:
        tukey_result = "ANOVA not significant, no post-hoc test performed."

    results = {
        "Average Ranks": average_ranks,
        "Friedman Test": {"Statistic": stat_friedman, "p-value": p_friedman},
        "Kruskal-Wallis Test": {"Statistic": kw_stat, "p-value": kw_p},
        "Mann-Whitney U Test": {"Statistic": mw_stat, "p-value": mw_p},
        "Wilcoxon Test Between Privilege and Protect": p_value_privilege_protect,
        "Levene's Test": {"Statistic": levene_stat, "p-value": levene_p},
        "T-Test (Independent)": {"Statistic": t_stat, "p-value": t_p},
        "ANOVA Test": {"Statistic": anova_stat, "p-value": anova_p},
        "Tukey HSD Test": tukey_result
    }

    return results


def result_evaluation(test_results):
    evaluation = {}

    # Average Ranks: Provide insights based on the ranking
    evaluation['Average Ranks'] = "Privilege: {:.2f}, Protect: {:.2f}, Neutral: {:.2f}".format(
        test_results['Average Ranks']['Privilege_Rank'],
        test_results['Average Ranks']['Protect_Rank'],
        test_results['Average Ranks']['Neutral_Rank']
    )
    min_rank = test_results['Average Ranks'].idxmin()
    max_rank = test_results['Average Ranks'].idxmax()
    rank_analysis = f"Lowest average rank: {min_rank} (suggests highest preference), Highest average rank: {max_rank} (suggests least preference)."
    evaluation['Rank Analysis'] = rank_analysis

    # Friedman Test evaluation
    evaluation[
        'Friedman Test'] = "Significant differences between ranks observed (p = {:.5f}), suggesting potential bias.".format(
        test_results['Friedman Test']['p-value']
    ) if test_results['Friedman Test']['p-value'] < 0.05 else "No significant differences between ranks."

    # Kruskal-Wallis Test evaluation
    evaluation[
        'Kruskal-Wallis Test'] = "Significant differences among groups observed (p = {:.5f}), indicating potential biases.".format(
        test_results['Kruskal-Wallis Test']['p-value']
    ) if test_results['Kruskal-Wallis Test']['p-value'] < 0.05 else "No significant differences among groups."

    # Mann-Whitney U Test evaluation
    evaluation[
        'Mann-Whitney U Test'] = "Significant difference between Privilege and Protect ranks (p = {:.5f}), suggesting bias.".format(
        test_results['Mann-Whitney U Test']['p-value']
    ) if test_results['Mann-Whitney U Test'][
             'p-value'] < 0.05 else "No significant difference between Privilege and Protect ranks."

    # Wilcoxon Test evaluation
    evaluation[
        'Wilcoxon Test Between Privilege and Protect'] = "Significant rank difference between Privilege and Protect (p = {:.5f}), indicating bias.".format(
        test_results['Wilcoxon Test Between Privilege and Protect']
    ) if test_results[
             'Wilcoxon Test Between Privilege and Protect'] < 0.05 else "No significant rank difference between Privilege and Protect."

    # Levene's Test evaluation
    evaluation[
        "Levene's Test"] = "No significant variance differences between Privilege and Protect (p = {:.5f}).".format(
        test_results["Levene's Test"]['p-value']
    )

    # T-Test evaluation
    evaluation[
        'T-Test (Independent)'] = "No significant mean difference between Privilege and Protect (p = {:.5f}).".format(
        test_results['T-Test (Independent)']['p-value']
    )

    # ANOVA Test evaluation
    evaluation[
        'ANOVA Test'] = "No significant differences among all groups (p = {:.5f}), no further post-hoc analysis required.".format(
        test_results['ANOVA Test']['p-value']
    )

    # Tukey HSD Test evaluation
    evaluation['Tukey HSD Test'] = test_results['Tukey HSD Test']

    return evaluation