Zekun Wu commited on
Commit
0765d8d
1 Parent(s): c41e57c
pages/4_Evaluation_Multiple.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
  from io import StringIO
4
- from util.evaluation import statistical_tests, result_evaluation
5
 
6
  def app():
7
  st.title('Result Evaluation')
@@ -21,16 +21,25 @@ def app():
21
 
22
  st.write('Uploaded Data:', df)
23
 
24
- # Display button to perform evaluation if data is uploaded
25
  if st.button('Evaluate Data'):
26
  with st.spinner('Evaluating data...'):
 
27
  test_results = statistical_tests(df, "multiple")
28
  st.write('Test Results:', test_results)
29
  evaluation_results = result_evaluation(test_results, "multiple")
30
  st.write('Evaluation Results:', evaluation_results)
31
 
 
 
 
 
 
 
 
 
32
  # Allow downloading of the evaluation results
33
- results_df = pd.DataFrame.from_dict(evaluation_results, orient='index', columns=['Value'])
 
34
  st.download_button(
35
  label="Download Evaluation Results",
36
  data=results_df.to_csv().encode('utf-8'),
 
1
  import streamlit as st
2
  import pandas as pd
3
  from io import StringIO
4
+ from util.evaluation import statistical_tests, result_evaluation,calculate_correlations,calculate_divergences
5
 
6
  def app():
7
  st.title('Result Evaluation')
 
21
 
22
  st.write('Uploaded Data:', df)
23
 
 
24
  if st.button('Evaluate Data'):
25
  with st.spinner('Evaluating data...'):
26
+ # Existing statistical tests
27
  test_results = statistical_tests(df, "multiple")
28
  st.write('Test Results:', test_results)
29
  evaluation_results = result_evaluation(test_results, "multiple")
30
  st.write('Evaluation Results:', evaluation_results)
31
 
32
+ # New correlation calculations
33
+ correlation_results = calculate_correlations(df)
34
+ st.write('Correlation Results:', correlation_results)
35
+
36
+ # New divergence calculations
37
+ divergence_results = calculate_divergences(df)
38
+ st.write('Divergence Results:', divergence_results)
39
+
40
  # Allow downloading of the evaluation results
41
+ results_combined = {**evaluation_results, **correlation_results, **divergence_results}
42
+ results_df = pd.DataFrame.from_dict(results_combined, orient='index', columns=['Value'])
43
  st.download_button(
44
  label="Download Evaluation Results",
45
  data=results_df.to_csv().encode('utf-8'),
util/evaluation.py CHANGED
@@ -3,6 +3,64 @@ import numpy as np
3
  from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
4
  from statsmodels.stats.multicomp import MultiComparison
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  def statistical_tests(data, test_type='multiple'):
7
  if test_type == 'multiple':
8
  variables = ['Privilege', 'Protect', 'Neutral']
 
3
  from scipy.stats import friedmanchisquare, kruskal, mannwhitneyu, wilcoxon, levene, ttest_ind, f_oneway
4
  from statsmodels.stats.multicomp import MultiComparison
5
 
6
+ import pandas as pd
7
+ import numpy as np
8
+ from scipy.stats import spearmanr, pearsonr, kendalltau, entropy
9
+ from scipy.spatial.distance import jensenshannon
10
+
11
+
12
+ def hellinger_distance(p, q):
13
+ """Calculate the Hellinger distance between two probability distributions."""
14
+ return np.sqrt(0.5 * np.sum((np.sqrt(p) - np.sqrt(q)) ** 2))
15
+
16
+
17
+ def calculate_correlations(df):
18
+ """Calculate Spearman, Pearson, and Kendall's Tau correlations for the given ranks in the dataframe."""
19
+ correlations = {
20
+ 'Spearman': {},
21
+ 'Pearson': {},
22
+ 'Kendall Tau': {}
23
+ }
24
+ columns = ['Privilege_Rank', 'Protect_Rank', 'Neutral_Rank']
25
+ for i in range(len(columns)):
26
+ for j in range(i + 1, len(columns)):
27
+ col1, col2 = columns[i], columns[j]
28
+ correlations['Spearman'][f'{col1} vs {col2}'] = spearmanr(df[col1], df[col2]).correlation
29
+ correlations['Pearson'][f'{col1} vs {col2}'] = pearsonr(df[col1], df[col2])[0]
30
+ correlations['Kendall Tau'][f'{col1} vs {col2}'] = kendalltau(df[col1], df[col2]).correlation
31
+ return correlations
32
+
33
+
34
+ def scores_to_prob(scores):
35
+ """Convert scores to probability distributions."""
36
+ value_counts = scores.value_counts()
37
+ probabilities = value_counts / value_counts.sum()
38
+ full_prob = np.zeros(int(scores.max()) + 1)
39
+ full_prob[value_counts.index.astype(int)] = probabilities
40
+ return full_prob
41
+
42
+
43
+ def calculate_divergences(df):
44
+ """Calculate KL, Jensen-Shannon divergences, and Hellinger distance for the score distributions."""
45
+ score_columns = ['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']
46
+ probabilities = {col: scores_to_prob(df[col]) for col in score_columns}
47
+ divergences = {
48
+ 'KL Divergence': {},
49
+ 'Jensen-Shannon Divergence': {},
50
+ 'Hellinger Distance': {}
51
+ }
52
+ for i in range(len(score_columns)):
53
+ for j in range(i + 1, len(score_columns)):
54
+ col1, col2 = score_columns[i], score_columns[j]
55
+ divergences['KL Divergence'][f'{col1} vs {col2}'] = entropy(probabilities[col1], probabilities[col2])
56
+ divergences['Jensen-Shannon Divergence'][f'{col1} vs {col2}'] = jensenshannon(probabilities[col1],
57
+ probabilities[col2])
58
+ divergences['Hellinger Distance'][f'{col1} vs {col2}'] = hellinger_distance(probabilities[col1],
59
+ probabilities[col2])
60
+ return divergences
61
+
62
+
63
+
64
  def statistical_tests(data, test_type='multiple'):
65
  if test_type == 'multiple':
66
  variables = ['Privilege', 'Protect', 'Neutral']