Zekun Wu commited on
Commit
9dd76eb
1 Parent(s): a6c512f
Files changed (1) hide show
  1. util/evaluation.py +30 -30
util/evaluation.py CHANGED
@@ -9,45 +9,45 @@ from scipy.stats import ttest_ind, friedmanchisquare, rankdata, ttest_rel
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
  from scipy.stats import ttest_1samp
11
 
12
- # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
13
- # """Perform a bootstrapped t-test."""
14
- # observed_t_stat, _ = ttest_ind(data1, data2)
15
- # combined = np.concatenate([data1, data2])
16
- # t_stats = []
17
- #
18
- # for _ in range(num_bootstrap):
19
- # np.random.shuffle(combined)
20
- # new_data1 = combined[:len(data1)]
21
- # new_data2 = combined[len(data1):]
22
- # t_stat, _ = ttest_ind(new_data1, new_data2)
23
- # t_stats.append(t_stat)
24
- #
25
- # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
26
- # return observed_t_stat, p_value
27
-
28
-
29
  def bootstrap_t_test(data1, data2, num_bootstrap=1000):
30
- """Perform a bootstrapped paired t-test for mean difference being zero."""
31
- # Calculate the observed differences between paired samples
32
- differences = data1 - data2
33
- # Compute the observed t-statistic for the differences
34
- observed_t_stat, _ = ttest_1samp(differences, 0)
35
-
36
  t_stats = []
37
 
38
  for _ in range(num_bootstrap):
39
- # Resample the differences with replacement
40
- resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
41
- # Perform a one-sample t-test on the resampled differences against zero
42
- t_stat, _ = ttest_1samp(resampled_diffs, 0)
43
- # Append the t-statistic to the list
44
  t_stats.append(t_stat)
45
 
46
- # Calculate the p-value as the proportion of bootstrap t-statistics
47
- # that are as extreme as or more extreme than the observed t-statistic
48
  p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
49
  return observed_t_stat, p_value
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def posthoc_friedman(data, variables, rank_suffix='_Rank'):
52
  """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
53
  ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()
 
9
  from statsmodels.stats.multicomp import pairwise_tukeyhsd
10
  from scipy.stats import ttest_1samp
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def bootstrap_t_test(data1, data2, num_bootstrap=1000):
13
+ """Perform a bootstrapped t-test."""
14
+ observed_t_stat, _ = ttest_ind(data1, data2)
15
+ combined = np.concatenate([data1, data2])
 
 
 
16
  t_stats = []
17
 
18
  for _ in range(num_bootstrap):
19
+ np.random.shuffle(combined)
20
+ new_data1 = combined[:len(data1)]
21
+ new_data2 = combined[len(data1):]
22
+ t_stat, _ = ttest_ind(new_data1, new_data2)
 
23
  t_stats.append(t_stat)
24
 
 
 
25
  p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
26
  return observed_t_stat, p_value
27
 
28
+
29
+ # def bootstrap_t_test(data1, data2, num_bootstrap=1000):
30
+ # """Perform a bootstrapped paired t-test for mean difference being zero."""
31
+ # # Calculate the observed differences between paired samples
32
+ # differences = data1 - data2
33
+ # # Compute the observed t-statistic for the differences
34
+ # observed_t_stat, _ = ttest_1samp(differences, 0)
35
+ #
36
+ # t_stats = []
37
+ #
38
+ # for _ in range(num_bootstrap):
39
+ # # Resample the differences with replacement
40
+ # resampled_diffs = np.random.choice(differences, size=len(differences), replace=True)
41
+ # # Perform a one-sample t-test on the resampled differences against zero
42
+ # t_stat, _ = ttest_1samp(resampled_diffs, 0)
43
+ # # Append the t-statistic to the list
44
+ # t_stats.append(t_stat)
45
+ #
46
+ # # Calculate the p-value as the proportion of bootstrap t-statistics
47
+ # # that are as extreme as or more extreme than the observed t-statistic
48
+ # p_value = np.sum(np.abs(t_stats) >= np.abs(observed_t_stat)) / num_bootstrap
49
+ # return observed_t_stat, p_value
50
+
51
  def posthoc_friedman(data, variables, rank_suffix='_Rank'):
52
  """Perform a post-hoc analysis for the Friedman test using pairwise comparisons."""
53
  ranked_data = data[[v + rank_suffix for v in variables]].to_numpy()