import pandas as pd import json class ResultsProcessor: def __init__(self, prompt_option, result_file, data_dict): self.prompt_option = prompt_option self.result_file = result_file self.data_dict = data_dict def get_overall_performance(self): return round(self.data_dict["Overall performance"]*100, 2) def get_bias_ratios_df(self): fairness_results = self.data_dict['Fairness results'] characteristic_list = [] fairness_ratio_list = [] for key, val in fairness_results.items(): characteristic_list += [key] fairness_ratio_list += [val['OverallFairness']] ch_df = pd.DataFrame({ 'Characteristic': characteristic_list, 'Bias ratio': fairness_ratio_list }).sort_values(by=['Characteristic']) return ch_df def get_global_perturbers_df(self): global_perturber_families = self.data_dict['Perturber Families'] perf_pert_values = [] normalized_perf_pert_values = [] family_levels = [] family_names_list = [] levels_index_list = [] for item in global_perturber_families: family_name = item['family name'] family_results = self.data_dict['Performance Robustness']['Perturber family wise results'][family_name]["PerformancePerturbers"]# TODO: change the structuer of post processing here family_levels += item['levels'] original_perf = family_results[item['levels'][0]] count = 0 for t_item in item['levels']: perf_pert_values += [family_results[t_item]] normalized_perf_pert_values += [family_results[t_item]/original_perf] family_names_list += [family_name] levels_index_list += [count] count += 1 t_pert_df_global = pd.DataFrame({ 'Perturbation level': family_levels, 'Performance': perf_pert_values, 'normalized performance': normalized_perf_pert_values, 'Perturbation family': family_names_list, 'Levels' : levels_index_list }) t_pert_df_global['category'] = 'Overall' return t_pert_df_global def get_data_distribution(self, embedder_option): embedder_perf_ci_table = self.data_dict['Performance results'][embedder_option]['CI_Table'] n_points = self.data_dict['n points'] category_share_of_data = {} categories_list = [] share_of_data_list = [] n_points_list = [] for key, val in embedder_perf_ci_table.items(): categories_list += [val['category']] share_of_data_list += [val['Share of Data']] n_points_list += [int(val['Share of Data']*n_points/100)] t_df = pd.DataFrame({ 'Category': categories_list, 'Share of data': share_of_data_list, 'Number of points': n_points_list }) return t_df def get_fairness_confidence_interval_df(self, embedder_option): embedder_fair_ci_table = self.data_dict['Fairness results'][embedder_option]['CI_Table'] categories_list = [] estimates_list = [] uppers_list = [] lowers_list = [] for key, val in embedder_fair_ci_table.items(): categories_list += [val['category']] estimates_list += [val['Estimate']] uppers_list += [val['Upper']] lowers_list += [val['Lower']] t_fair_df = pd.DataFrame({ 'Category': categories_list, 'Estimate': estimates_list, 'Upper': uppers_list, 'Lower': lowers_list, 'Index': list(range(len(uppers_list))) }) t_fair_df['Index'] = t_fair_df['Index'].astype(float) t_fair_df['Diff upper'] = t_fair_df['Upper'] - t_fair_df['Estimate'] t_fair_df['Diff lower'] = t_fair_df['Estimate'] - t_fair_df['Lower'] return t_fair_df def get_performance_robustness(self, embedder_option): t_pert_df_global = self.get_global_perturbers_df() global_perturber_families = self.data_dict['Perturber Families'] t_result = self.data_dict['Performance Robustness']['Embedder wise results'][embedder_option] merged_dfs_list = [] t_pert_df_global_temps_list = [] family_names_list = [] # Embedder categories for item in global_perturber_families: family_name = item['family name'] dfs_list = [] count = 0 for t_item in item['levels']: df = pd.DataFrame(t_result[t_item]) df['Perturber'] = t_item df['Perturber family'] = family_name df['Levels'] = count dfs_list += [df] count += 1 merged_df = pd.concat(dfs_list, axis=0) merged_dfs_list += [merged_df] family_names_list += [family_name] t_pert_df_global_temp = t_pert_df_global[t_pert_df_global['Perturbation family'] == family_name].copy(deep=True) t_pert_df_global_temps_list +=[t_pert_df_global_temp] return { 'merged_dfs_list' : merged_dfs_list, 't_pert_df_global_temps_list' : t_pert_df_global_temps_list, 'family_names_list' : family_names_list }