Spaces:

holistic-ai
/

job-fair

Sleeping

App Files Files Community

Zekun Wu commited on May 18, 2024

Commit

f335959

1 Parent(s): 634ac1c

update

Browse files

Files changed (1) hide show

util/evaluation.py +131 -43

util/evaluation.py CHANGED Viewed

@@ -1,12 +1,100 @@
 import pandas as pd
 import numpy as np
 from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
 from statsmodels.stats.multicomp import MultiComparison
-import pandas as pd
-import numpy as np
 from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
 from scipy.spatial.distance import jensenshannon
 def hellinger_distance(p, q):
@@ -59,47 +147,47 @@ def calculate_divergences(df):
                                                                                         probabilities[col2])
     return divergences
-def statistical_tests(data):
-    """Perform various statistical tests to evaluate potential biases."""
-    variables = ['Privilege', 'Protect', 'Neutral']
-    rank_suffix = '_Rank'
-    score_suffix = '_Avg_Score'
-    # # Calculate average ranks
-    rank_columns = [v + rank_suffix for v in variables]
-    average_ranks = data[rank_columns].mean()
-    # Statistical tests
-    rank_data = [data[col] for col in rank_columns]
-    # Pairwise tests
-    pairs = [
-        ('Privilege', 'Protect'),
-        ('Protect', 'Neutral'),
-        ('Privilege', 'Neutral')
-    ]
-    pairwise_results = {
-        'T-Test': {}
-    }
-    for (var1, var2) in pairs:
-        pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
-        # T-test for independent samples
-        t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
-        pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
-    results = {
-        "Average Ranks": average_ranks.to_dict(),
-        "Friedman Test": {
-            "Statistic": friedmanchisquare(*rank_data).statistic,
-            "p-value": friedmanchisquare(*rank_data).pvalue
-        },
-        **pairwise_results,
-    }
-    return results
 def disabled_statistical_tests(data):
     """Perform various statistical tests to evaluate potential biases."""

 import pandas as pd
 import numpy as np
+from scipy import stats
 from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
 from statsmodels.stats.multicomp import MultiComparison
 from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
 from scipy.spatial.distance import jensenshannon
+from scipy.stats import ttest_ind, friedmanchisquare, rankdata
+from statsmodels.stats.multicomp import pairwise_tukeyhsd
+def bootstrap_t_test(data1, data2, num_bootstrap=1000):
+    """Perform a bootstrapped t-test."""
+    observed_t_stat, _ = ttest_ind(data1, data2)
+    combined = np.concatenate([data1, data2])
+    t_stats = []
+    for _ in range(num_bootstrap):
+        np.random.shuffle(combined)
+        new_data1 = combined[:len(data1)]
+        new_data2 = combined[len(data1):]
+        t_stat, _ = ttest_ind(new_data1, new_data2)
+        t_stats.append(t_stat)
+    p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
+    return observed_t_stat, p_value
+def posthoc_friedman(data, variables, rank_suffix='_Rank'):
+    """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
+    ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
+    num_subjects = ranked_data.shape[0]
+    num_conditions = ranked_data.shape[1]
+    comparisons = []
+    for i in range(num_conditions):
+        for j in range(i + 1, num_conditions):
+            diff = ranked_data[:, i] - ranked_data[:, j]
+            abs_diff = np.abs(diff)
+            avg_diff = np.mean(diff)
+            se_diff = np.std(diff, ddof=1) / np.sqrt(num_subjects)
+            z_value = avg_diff / se_diff
+            p_value = 2 * (1 - stats.norm.cdf(np.abs(z_value)))
+            comparisons.append({
+                "Group1": variables[i],
+                "Group2": variables[j],
+                "Z": z_value,
+                "p-value": p_value
+            })
+    return comparisons
+def statistical_tests(data):
+    """Perform various statistical tests to evaluate potential biases."""
+    variables = ['Privilege', 'Protect', 'Neutral']
+    rank_suffix = '_Rank'
+    score_suffix = '_Avg_Score'
+    # Calculate average ranks
+    rank_columns = [v + rank_suffix for v in variables]
+    average_ranks = data[rank_columns].mean()
+    # Statistical tests
+    rank_data = [data[col] for col in rank_columns]
+    # Pairwise tests
+    pairs = [
+        ('Privilege', 'Protect'),
+        ('Protect', 'Neutral'),
+        ('Privilege', 'Neutral')
+    ]
+    pairwise_results = {
+        'T-Test': {}
+    }
+    for (var1, var2) in pairs:
+        pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
+        # Bootstrapped T-test for independent samples
+        t_stat, t_p = bootstrap_t_test(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
+        pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
+    # Friedman test
+    friedman_stat, friedman_p = friedmanchisquare(*rank_data)
+    posthoc_results = posthoc_friedman(data, variables, rank_suffix)
+    results = {
+        "Average Ranks": average_ranks.to_dict(),
+        "Friedman Test": {
+            "Statistic": friedman_stat,
+            "p-value": friedman_p,
+            "Post-hoc": posthoc_results
+        },
+        **pairwise_results,
+    }
+    return results
 def hellinger_distance(p, q):
                                                                                         probabilities[col2])
     return divergences
+# def statistical_tests(data):
+#     """Perform various statistical tests to evaluate potential biases."""
+#     variables = ['Privilege', 'Protect', 'Neutral']
+#     rank_suffix = '_Rank'
+#     score_suffix = '_Avg_Score'
+#
+#     # # Calculate average ranks
+#     rank_columns = [v + rank_suffix for v in variables]
+#     average_ranks = data[rank_columns].mean()
+#
+#     # Statistical tests
+#     rank_data = [data[col] for col in rank_columns]
+#
+#     # Pairwise tests
+#     pairs = [
+#         ('Privilege', 'Protect'),
+#         ('Protect', 'Neutral'),
+#         ('Privilege', 'Neutral')
+#     ]
+#
+#     pairwise_results = {
+#         'T-Test': {}
+#     }
+#
+#     for (var1, var2) in pairs:
+#         pair_name_score = f'{var1}{score_suffix} vs {var2}{score_suffix}'
+#
+#         # T-test for independent samples
+#         t_stat, t_p = ttest_ind(data[f'{var1}{score_suffix}'], data[f'{var2}{score_suffix}'])
+#         pairwise_results['T-Test'][pair_name_score] = {"Statistic": t_stat, "p-value": t_p}
+#
+#     results = {
+#         "Average Ranks": average_ranks.to_dict(),
+#         "Friedman Test": {
+#             "Statistic": friedmanchisquare(*rank_data).statistic,
+#             "p-value": friedmanchisquare(*rank_data).pvalue
+#         },
+#         **pairwise_results,
+#     }
+#
+#     return results
 def disabled_statistical_tests(data):
     """Perform various statistical tests to evaluate potential biases."""