Spaces:
Running
Running
import pandas as pd | |
import json | |
class ResultsProcessor: | |
def __init__(self, prompt_option, result_file, data_dict): | |
self.prompt_option = prompt_option | |
self.result_file = result_file | |
self.data_dict = data_dict | |
def get_overall_performance(self): | |
return round(self.data_dict["Overall performance"]*100, 2) | |
def get_bias_ratios_df(self): | |
fairness_results = self.data_dict['Fairness results'] | |
characteristic_list = [] | |
fairness_ratio_list = [] | |
for key, val in fairness_results.items(): | |
characteristic_list += [key] | |
fairness_ratio_list += [val['OverallFairness']] | |
ch_df = pd.DataFrame({ | |
'Characteristic': characteristic_list, | |
'Bias ratio': fairness_ratio_list | |
}).sort_values(by=['Characteristic']) | |
return ch_df | |
def get_global_perturbers_df(self): | |
global_perturber_families = self.data_dict['Perturber Families'] | |
perf_pert_values = [] | |
normalized_perf_pert_values = [] | |
family_levels = [] | |
family_names_list = [] | |
levels_index_list = [] | |
for item in global_perturber_families: | |
family_name = item['family name'] | |
family_results = self.data_dict['Performance Robustness']['Perturber family wise results'][family_name]["PerformancePerturbers"]# TODO: change the structuer of post processing here | |
family_levels += item['levels'] | |
original_perf = family_results[item['levels'][0]] | |
count = 0 | |
for t_item in item['levels']: | |
perf_pert_values += [family_results[t_item]] | |
normalized_perf_pert_values += [family_results[t_item]/original_perf] | |
family_names_list += [family_name] | |
levels_index_list += [count] | |
count += 1 | |
t_pert_df_global = pd.DataFrame({ | |
'Perturbation level': family_levels, | |
'Performance': perf_pert_values, | |
'normalized performance': normalized_perf_pert_values, | |
'Perturbation family': family_names_list, | |
'Levels' : levels_index_list | |
}) | |
t_pert_df_global['category'] = 'Overall' | |
return t_pert_df_global | |
def get_data_distribution(self, embedder_option): | |
embedder_perf_ci_table = self.data_dict['Performance results'][embedder_option]['CI_Table'] | |
n_points = self.data_dict['n points'] | |
category_share_of_data = {} | |
categories_list = [] | |
share_of_data_list = [] | |
n_points_list = [] | |
for key, val in embedder_perf_ci_table.items(): | |
categories_list += [val['category']] | |
share_of_data_list += [val['Share of Data']] | |
n_points_list += [int(val['Share of Data']*n_points/100)] | |
t_df = pd.DataFrame({ | |
'Category': categories_list, | |
'Share of data': share_of_data_list, | |
'Number of points': n_points_list | |
}) | |
return t_df | |
def get_fairness_confidence_interval_df(self, embedder_option): | |
embedder_fair_ci_table = self.data_dict['Fairness results'][embedder_option]['CI_Table'] | |
categories_list = [] | |
estimates_list = [] | |
uppers_list = [] | |
lowers_list = [] | |
for key, val in embedder_fair_ci_table.items(): | |
categories_list += [val['category']] | |
estimates_list += [val['Estimate']] | |
uppers_list += [val['Upper']] | |
lowers_list += [val['Lower']] | |
t_fair_df = pd.DataFrame({ | |
'Category': categories_list, | |
'Estimate': estimates_list, | |
'Upper': uppers_list, | |
'Lower': lowers_list, | |
'Index': list(range(len(uppers_list))) | |
}) | |
t_fair_df['Index'] = t_fair_df['Index'].astype(float) | |
t_fair_df['Diff upper'] = t_fair_df['Upper'] - t_fair_df['Estimate'] | |
t_fair_df['Diff lower'] = t_fair_df['Estimate'] - t_fair_df['Lower'] | |
return t_fair_df | |
def get_performance_robustness(self, embedder_option): | |
t_pert_df_global = self.get_global_perturbers_df() | |
global_perturber_families = self.data_dict['Perturber Families'] | |
t_result = self.data_dict['Performance Robustness']['Embedder wise results'][embedder_option] | |
merged_dfs_list = [] | |
t_pert_df_global_temps_list = [] | |
family_names_list = [] | |
# Embedder categories | |
for item in global_perturber_families: | |
family_name = item['family name'] | |
dfs_list = [] | |
count = 0 | |
for t_item in item['levels']: | |
df = pd.DataFrame(t_result[t_item]) | |
df['Perturber'] = t_item | |
df['Perturber family'] = family_name | |
df['Levels'] = count | |
dfs_list += [df] | |
count += 1 | |
merged_df = pd.concat(dfs_list, axis=0) | |
merged_dfs_list += [merged_df] | |
family_names_list += [family_name] | |
t_pert_df_global_temp = t_pert_df_global[t_pert_df_global['Perturbation family'] == family_name].copy(deep=True) | |
t_pert_df_global_temps_list +=[t_pert_df_global_temp] | |
return { | |
'merged_dfs_list' : merged_dfs_list, | |
't_pert_df_global_temps_list' : t_pert_df_global_temps_list, | |
'family_names_list' : family_names_list | |
} | |