mmahesh873 commited on
Commit
92b387d
1 Parent(s): dbd94bb

added utils

Browse files
Files changed (1) hide show
  1. utils.py +135 -0
utils.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import json
3
+
4
+
5
+ class ResultsProcessor:
6
+ def __init__(self, prompt_option, result_file, data_dict):
7
+ self.prompt_option = prompt_option
8
+ self.result_file = result_file
9
+ self.data_dict = data_dict
10
+
11
+
12
+ def get_overall_performance(self):
13
+ return round(self.data_dict["Overall performance"]*100, 2)
14
+
15
+ def get_bias_ratios_df(self):
16
+ fairness_results = self.data_dict['Fairness results']
17
+
18
+ characteristic_list = []
19
+ fairness_ratio_list = []
20
+ for key, val in fairness_results.items():
21
+ characteristic_list += [key]
22
+ fairness_ratio_list += [val['OverallFairness']]
23
+
24
+ ch_df = pd.DataFrame({
25
+ 'Characteristic': characteristic_list,
26
+ 'Bias ratio': fairness_ratio_list
27
+ }).sort_values(by=['Characteristic'])
28
+ return ch_df
29
+
30
+ def get_global_perturbers_df(self):
31
+ global_perturber_families = self.data_dict['Perturber Families']
32
+ perf_pert_values = []
33
+ normalized_perf_pert_values = []
34
+ family_levels = []
35
+ family_names_list = []
36
+ levels_index_list = []
37
+ for item in global_perturber_families:
38
+ family_name = item['family name']
39
+ family_results = self.data_dict['Performance Robustness']['Perturber family wise results'][family_name]["PerformancePerturbers"]# TODO: change the structuer of post processing here
40
+ family_levels += item['levels']
41
+ original_perf = family_results[item['levels'][0]]
42
+ count = 0
43
+ for t_item in item['levels']:
44
+ perf_pert_values += [family_results[t_item]]
45
+ normalized_perf_pert_values += [family_results[t_item]/original_perf]
46
+ family_names_list += [family_name]
47
+ levels_index_list += [count]
48
+ count += 1
49
+
50
+ t_pert_df_global = pd.DataFrame({
51
+ 'Perturbation level': family_levels,
52
+ 'Performance': perf_pert_values,
53
+ 'normalized performance': normalized_perf_pert_values,
54
+ 'Perturbation family': family_names_list,
55
+ 'Levels' : levels_index_list
56
+ })
57
+ t_pert_df_global['category'] = 'Overall'
58
+
59
+ return t_pert_df_global
60
+
61
+ def get_data_distribution(self, embedder_option):
62
+ embedder_perf_ci_table = self.data_dict['Performance results'][embedder_option]['CI_Table']
63
+ n_points = self.data_dict['n points']
64
+ category_share_of_data = {}
65
+ categories_list = []
66
+ share_of_data_list = []
67
+ n_points_list = []
68
+ for key, val in embedder_perf_ci_table.items():
69
+ categories_list += [val['category']]
70
+ share_of_data_list += [val['Share of Data']]
71
+ n_points_list += [int(val['Share of Data']*n_points/100)]
72
+
73
+ t_df = pd.DataFrame({
74
+ 'Category': categories_list,
75
+ 'Share of data': share_of_data_list,
76
+ 'Number of points': n_points_list
77
+ })
78
+ return t_df
79
+
80
+ def get_fairness_confidence_interval_df(self, embedder_option):
81
+ embedder_fair_ci_table = self.data_dict['Fairness results'][embedder_option]['CI_Table']
82
+ categories_list = []
83
+ estimates_list = []
84
+ uppers_list = []
85
+ lowers_list = []
86
+ for key, val in embedder_fair_ci_table.items():
87
+ categories_list += [val['category']]
88
+ estimates_list += [val['Estimate']]
89
+ uppers_list += [val['Upper']]
90
+ lowers_list += [val['Lower']]
91
+
92
+ t_fair_df = pd.DataFrame({
93
+ 'Category': categories_list,
94
+ 'Estimate': estimates_list,
95
+ 'Upper': uppers_list,
96
+ 'Lower': lowers_list,
97
+ 'Index': list(range(len(uppers_list)))
98
+ })
99
+ t_fair_df['Index'] = t_fair_df['Index'].astype(float)
100
+
101
+ t_fair_df['Diff upper'] = t_fair_df['Upper'] - t_fair_df['Estimate']
102
+ t_fair_df['Diff lower'] = t_fair_df['Estimate'] - t_fair_df['Lower']
103
+
104
+ return t_fair_df
105
+
106
+ def get_performance_robustness(self, embedder_option):
107
+ t_pert_df_global = self.get_global_perturbers_df()
108
+ global_perturber_families = self.data_dict['Perturber Families']
109
+ t_result = self.data_dict['Performance Robustness']['Embedder wise results'][embedder_option]
110
+ merged_dfs_list = []
111
+ t_pert_df_global_temps_list = []
112
+ family_names_list = []
113
+ # Embedder categories
114
+ for item in global_perturber_families:
115
+ family_name = item['family name']
116
+ dfs_list = []
117
+ count = 0
118
+ for t_item in item['levels']:
119
+ df = pd.DataFrame(t_result[t_item])
120
+ df['Perturber'] = t_item
121
+ df['Perturber family'] = family_name
122
+ df['Levels'] = count
123
+ dfs_list += [df]
124
+ count += 1
125
+ merged_df = pd.concat(dfs_list, axis=0)
126
+ merged_dfs_list += [merged_df]
127
+ family_names_list += [family_name]
128
+
129
+ t_pert_df_global_temp = t_pert_df_global[t_pert_df_global['Perturbation family'] == family_name].copy(deep=True)
130
+ t_pert_df_global_temps_list +=[t_pert_df_global_temp]
131
+ return {
132
+ 'merged_dfs_list' : merged_dfs_list,
133
+ 't_pert_df_global_temps_list' : t_pert_df_global_temps_list,
134
+ 'family_names_list' : family_names_list
135
+ }