Spaces:

holistic-ai
/

job-fair

Running

App Files Files Community

Zekun Wu commited on May 18, 2024

Commit

9dd76eb

1 Parent(s): a6c512f

update

Browse files

Files changed (1) hide show

util/evaluation.py +30 -30

util/evaluation.py CHANGED Viewed

@@ -9,45 +9,45 @@ from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
 from statsmodels.stats.multicomp import pairwise_tukeyhsd
 from scipy.stats import ttest_1samp
-# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
-#     """Perform a bootstrapped t-test."""
-#     observed_t_stat, _ = ttest_ind(data1, data2)
-#     combined = np.concatenate([data1, data2])
-#     t_stats = []
-#
-#     for _ in range(num_bootstrap):
-#         np.random.shuffle(combined)
-#         new_data1 = combined[:len(data1)]
-#         new_data2 = combined[len(data1):]
-#         t_stat, _ = ttest_ind(new_data1, new_data2)
-#         t_stats.append(t_stat)
-#
-#     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
-#     return observed_t_stat, p_value
 def bootstrap_t_test(data1, data2, num_bootstrap=1000):
-    """Perform a bootstrapped paired t-test for mean difference being zero."""
-    # Calculate the observed differences between paired samples
-    differences = data1 - data2
-    # Compute the observed t-statistic for the differences
-    observed_t_stat, _ = ttest_1samp(differences, 0)
     t_stats = []
     for _ in range(num_bootstrap):
-        # Resample the differences with replacement
-        resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
-        # Perform a one-sample t-test on the resampled differences against zero
-        t_stat, _ = ttest_1samp(resampled_diffs, 0)
-        # Append the t-statistic to the list
         t_stats.append(t_stat)
-    # Calculate the p-value as the proportion of bootstrap t-statistics
-    # that are as extreme as or more extreme than the observed t-statistic
     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
     return observed_t_stat, p_value
 def posthoc_friedman(data, variables, rank_suffix='_Rank'):
     """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
     ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()

 from statsmodels.stats.multicomp import pairwise_tukeyhsd
 from scipy.stats import ttest_1samp
 def bootstrap_t_test(data1, data2, num_bootstrap=1000):
+    """Perform a bootstrapped t-test."""
+    observed_t_stat, _ = ttest_ind(data1, data2)
+    combined = np.concatenate([data1, data2])
     t_stats = []
     for _ in range(num_bootstrap):
+        np.random.shuffle(combined)
+        new_data1 = combined[:len(data1)]
+        new_data2 = combined[len(data1):]
+        t_stat, _ = ttest_ind(new_data1, new_data2)
         t_stats.append(t_stat)
     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
     return observed_t_stat, p_value
+# def bootstrap_t_test(data1, data2, num_bootstrap=1000):
+#     """Perform a bootstrapped paired t-test for mean difference being zero."""
+#     # Calculate the observed differences between paired samples
+#     differences = data1 - data2
+#     # Compute the observed t-statistic for the differences
+#     observed_t_stat, _ = ttest_1samp(differences, 0)
+#
+#     t_stats = []
+#
+#     for _ in range(num_bootstrap):
+#         # Resample the differences with replacement
+#         resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
+#         # Perform a one-sample t-test on the resampled differences against zero
+#         t_stat, _ = ttest_1samp(resampled_diffs, 0)
+#         # Append the t-statistic to the list
+#         t_stats.append(t_stat)
+#
+#     # Calculate the p-value as the proportion of bootstrap t-statistics
+#     # that are as extreme as or more extreme than the observed t-statistic
+#     p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
+#     return observed_t_stat, p_value
 def posthoc_friedman(data, variables, rank_suffix='_Rank'):
     """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
     ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()