Spaces:
Runtime error
Runtime error
| # --- | |
| # jupyter: | |
| # jupytext: | |
| # text_representation: | |
| # extension: .py | |
| # format_name: light | |
| # format_version: '1.5' | |
| # jupytext_version: 1.16.2 | |
| # kernelspec: | |
| # display_name: temps | |
| # language: python | |
| # name: temps | |
| # --- | |
| # + | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from astropy.io import fits | |
| import os | |
| from astropy.table import Table | |
| from temps.utils import nmad | |
| from scipy import stats | |
| from pathlib import Path | |
| # - | |
| #define here the directory containing the photometric catalogues | |
| parent_dir = '/data/astro/scratch/lcabayol/EUCLID/DAz/DC2_results_to_share/' | |
| # + | |
| # List of FITS files to be processed | |
| fits_files = [ | |
| 'GDE_RF_full.fits', | |
| 'GDE_PHOSPHOROS_V2_full.fits', | |
| 'OIL_LEPHARE_full.fits', | |
| 'JDV_DNF_A_full.fits', | |
| 'JSP_FRANKENZ_full.fits', | |
| 'MBR_METAPHOR_full.fits', | |
| 'GDE_ADABOOST_full.fits', | |
| 'CSC_GPZ_best_full.fits', | |
| 'SFO_CPZ_full.fits', | |
| 'AAL_NNPZ_V3_full.fits' | |
| ] | |
| # Corresponding redshift column names | |
| redshift_columns = [ | |
| 'REDSHIFT_RF', | |
| 'REDSHIFT_PHOSPHOROS', | |
| 'REDSHIFT_LEPHARE', | |
| 'REDSHIFT_DNF', | |
| 'REDSHIFT_FRANKENZ', | |
| 'REDSHIFT_METAPHOR', | |
| 'REDSHIFT_ADABOOST', | |
| 'REDSHIFT_GPZ', | |
| 'REDSHIFT_CPZ', | |
| 'REDSHIFT_NNPZ' | |
| ] | |
| # Initialize an empty DataFrame for merging | |
| merged_df = pd.DataFrame() | |
| # Process each FITS file | |
| for fits_file, redshift_col in zip(fits_files, redshift_columns): | |
| print(fits_file) | |
| # Open the FITS file | |
| hdu_list = fits.open(os.path.join(parent_dir,fits_file)) | |
| df = Table(hdu_list[1].data).to_pandas() | |
| df = df[df.REDSHIFT!=0] | |
| df = df[['ID', 'VIS','SPECZ', 'REDSHIFT']].rename(columns={'REDSHIFT': redshift_col}) | |
| # Merge with the main DataFrame | |
| if merged_df.empty: | |
| merged_df = df | |
| else: | |
| merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ'], how='outer') | |
| # - | |
| # ## OPEN DATA | |
| # + | |
| modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
| filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
| hdu_list = fits.open(modules_dir/filename_valid) | |
| cat_full = Table(hdu_list[1].data).to_pandas() | |
| cat_full = cat_full[['ID','z_spec_S15','reliable_S15','mu_class_L07']] | |
| merged_df['reliable_S15'] = cat_full.reliable_S15 | |
| merged_df['z_spec_S15'] = cat_full.z_spec_S15 | |
| merged_df['mu_class_L07'] = cat_full.mu_class_L07 | |
| merged_df['ID_catfull'] = cat_full.ID | |
| # - | |
| merged_df_specz = merged_df[(merged_df.z_spec_S15>0)&(merged_df.SPECZ>0)&(merged_df.reliable_S15==1)&(merged_df.mu_class_L07==1)&(merged_df.VIS!=np.inf)] | |
| # ## ONLY SPECZ SAMPLE | |
| scatter, outliers =[],[] | |
| for im, method in enumerate(redshift_columns): | |
| print(method) | |
| df_method = merged_df_specz.dropna(subset=method) | |
| zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ) | |
| print(len(zerr[np.abs(zerr)>0.15]) /len(zerr)) | |
| scatter.append(nmad(zerr)) | |
| outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
| # + | |
| labs = [ | |
| 'RF', | |
| 'PHOSPHOROS', | |
| 'LEPHARE', | |
| 'DNF', | |
| 'FRANKENZ', | |
| 'METAPHOR', | |
| 'ADABOOST', | |
| 'GPZ', | |
| 'CPZ', | |
| 'NNPZ', | |
| ] | |
| # Colors from colormap | |
| cmap = plt.get_cmap('tab20') | |
| colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
| # Plotting | |
| plt.figure(figsize=(10, 6)) | |
| for i in range(len(labs)): | |
| plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^') | |
| # Adding legend | |
| plt.legend(fontsize=12) | |
| plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14) | |
| plt.xlabel('Outlier fraction [%]', fontsize=14) | |
| plt.xticks(fontsize=14) | |
| plt.yticks(fontsize=14) | |
| plt.xlim(5,35) | |
| plt.ylim(0,0.14) | |
| # Display plot | |
| plt.show() | |
| # - | |
| # ### ADD TEMPS PREDICTIONS | |
| import torch | |
| from temps.archive import Archive | |
| from temps.temps_arch import EncoderPhotometry, MeasureZ | |
| from temps.temps import TempsModule | |
| # + | |
| data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
| filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
| hdu_list = fits.open(data_dir/filename_valid) | |
| cat_phot = Table(hdu_list[1].data).to_pandas() | |
| # - | |
| cat_phot = cat_phot[cat_phot.ID.isin(merged_df_specz.ID_catfull)] | |
| # + | |
| photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5', | |
| only_zspec=True) | |
| f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot) | |
| col, colerr = photoz_archive._to_colors(f, ferr) | |
| ID = cat_phot.ID | |
| # + | |
| modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models') | |
| nn_features = EncoderPhotometry() | |
| nn_features.load_state_dict(torch.load(modules_dir / f'modelF_DA.pt',map_location=torch.device('cpu'))) | |
| nn_z = MeasureZ(num_gauss=6) | |
| nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_DA.pt', map_location=torch.device('cpu'))) | |
| temps_module = TempsModule(nn_features, nn_z) | |
| z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), | |
| return_pz=True) | |
| df = pd.DataFrame(np.c_[ID, z], | |
| columns=['ID','TEMPS']) | |
| df = df.dropna() | |
| # - | |
| merged_df_specz= merged_df_specz.merge(df, left_on='ID_catfull', right_on='ID') | |
| # Corresponding redshift column names | |
| redshift_columns = redshift_columns + ['TEMPS'] | |
| scatter, outliers =[],[] | |
| for im, method in enumerate(redshift_columns): | |
| print(method) | |
| df_method = merged_df_specz.dropna(subset=method) | |
| zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ) | |
| print(len(zerr[np.abs(zerr)>0.15]) /len(zerr)) | |
| scatter.append(nmad(zerr)) | |
| outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
| # + | |
| labs = [ | |
| 'RF', | |
| 'PHOSPHOROS', | |
| 'LEPHARE', | |
| 'DNF', | |
| 'FRANKENZ', | |
| 'METAPHOR', | |
| 'ADABOOST', | |
| 'GPZ', | |
| 'CPZ', | |
| 'NNPZ', | |
| 'TEMPS' | |
| ] | |
| # Colors from colormap | |
| cmap = plt.get_cmap('tab20') | |
| colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
| # Plotting | |
| plt.figure(figsize=(10, 6)) | |
| for i in range(len(labs)): | |
| plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^') | |
| # Adding legend | |
| plt.legend(fontsize=12) | |
| plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14) | |
| plt.xlabel('Outlier fraction [%]', fontsize=14) | |
| plt.xticks(fontsize=14) | |
| plt.yticks(fontsize=14) | |
| plt.xlim(5,35) | |
| plt.ylim(0,0.14) | |
| # Display plot | |
| plt.show() | |
| # - | |
| # ## ANOTHER SELECTION | |
| # + | |
| # List of FITS files to be processed | |
| fits_files = [ | |
| 'GDE_RF_full.fits', | |
| 'GDE_PHOSPHOROS_V2_full.fits', | |
| 'OIL_LEPHARE_full.fits', | |
| 'JDV_DNF_A_full.fits', | |
| 'JSP_FRANKENZ_full.fits', | |
| 'MBR_METAPHOR_full.fits', | |
| 'GDE_ADABOOST_full.fits', | |
| 'CSC_GPZ_best_full.fits', | |
| 'SFO_CPZ_full.fits', | |
| 'AAL_NNPZ_V3_full.fits' | |
| ] | |
| # Corresponding redshift column names | |
| redshift_columns = [ | |
| 'REDSHIFT_RF', | |
| 'REDSHIFT_PHOSPHOROS', | |
| 'REDSHIFT_LEPHARE', | |
| 'REDSHIFT_DNF', | |
| 'REDSHIFT_FRANKENZ', | |
| 'REDSHIFT_METAPHOR', | |
| 'REDSHIFT_ADABOOST', | |
| 'REDSHIFT_GPZ', | |
| 'REDSHIFT_CPZ', | |
| 'REDSHIFT_NNPZ' | |
| ] | |
| use_columns = [ | |
| 'USE_RF', | |
| 'USE_PHOSPHOROS', | |
| 'USE_LEPHARE', | |
| 'USE_DNF', | |
| 'USE_FRANKENZ', | |
| 'USE_METAPHOR', | |
| 'USE_ADABOOST', | |
| 'USE_GPZ', | |
| 'USE_CPZ', | |
| 'USE_NNPZ' | |
| ] | |
| # Initialize an empty DataFrame for merging | |
| merged_df = pd.DataFrame() | |
| # Process each FITS file | |
| for fits_file, redshift_col,use_col in zip(fits_files, redshift_columns,use_columns): | |
| print(fits_file) | |
| # Open the FITS file | |
| hdu_list = fits.open(os.path.join(parent_dir,fits_file)) | |
| df = Table(hdu_list[1].data).to_pandas() | |
| df = df[df.REDSHIFT!=0] | |
| df = df[['ID', 'VIS', 'SPECZ', 'REDSHIFT', 'L15PHZ', 'USE']].rename(columns={'REDSHIFT': redshift_col, 'USE': use_col}) | |
| # Merge with the main DataFrame | |
| if merged_df.empty: | |
| merged_df = df | |
| else: | |
| merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ','L15PHZ'], how='outer') | |
| # - | |
| merged_df['comp_z'] = np.where(merged_df['SPECZ'] > 0, merged_df['SPECZ'], merged_df['L15PHZ']) | |
| #merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS>23.5)] | |
| merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS<25)] | |
| # + | |
| modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
| filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
| hdu_list = fits.open(modules_dir/filename_valid) | |
| cat_full = Table(hdu_list[1].data).to_pandas() | |
| merged_df['ID_catfull'] = cat_full.ID | |
| # + | |
| data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5') | |
| filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits' | |
| hdu_list = fits.open(data_dir/filename_valid) | |
| cat_phot = Table(hdu_list[1].data).to_pandas() | |
| # - | |
| cat_phot = cat_phot[cat_phot.ID.isin(merged_df.ID_catfull)] | |
| # + | |
| photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5', | |
| only_zspec=False) | |
| f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot) | |
| col, colerr = photoz_archive._to_colors(f, ferr) | |
| ID = cat_phot.ID | |
| # + | |
| modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models') | |
| nn_features = EncoderPhotometry() | |
| nn_features.load_state_dict(torch.load(modules_dir/f'modelF_DA.pt',map_location=torch.device('cpu'))) | |
| nn_z = MeasureZ(num_gauss=6) | |
| nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_DA.pt',map_location=torch.device('cpu'))) | |
| temps_module = TempsModule(nn_features, nn_z) | |
| z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col), | |
| return_pz=True) | |
| nn_features = EncoderPhotometry() | |
| nn_features.load_state_dict(torch.load(modules_dir/f'modelF_z.pt',map_location=torch.device('cpu'))) | |
| nn_z = MeasureZ(num_gauss=6) | |
| nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_z.pt',map_location=torch.device('cpu'))) | |
| temps_module = TempsModule(nn_features, nn_z) | |
| znoda, pz, odds_noda = temps_module.get_pz(input_data=torch.Tensor(col), | |
| return_pz=True) | |
| nn_features = EncoderPhotometry() | |
| nn_features.load_state_dict(torch.load(modules_dir/f'modelF_L15.pt',map_location=torch.device('cpu'))) | |
| nn_z = MeasureZ(num_gauss=6) | |
| nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_L15.pt',map_location=torch.device('cpu'))) | |
| temps_module = TempsModule(nn_features, nn_z) | |
| z_L15, pz, odds_L15 = temps_module.get_pz(input_data=torch.Tensor(col), | |
| return_pz=True) | |
| df = pd.DataFrame(np.c_[ID, z, odds, znoda, odds_noda,z_L15, odds_L15], | |
| columns=['ID','TEMPS', 'flag_TEMPS', 'TEMPS_noda', 'flag_TEMPSnoda', 'TEMPS_L15', 'flag_L15']) | |
| df = df.dropna() | |
| # + | |
| percent=0.3 | |
| df['USE_TEMPS'] = np.zeros(shape=len(df)) | |
| # Calculate the 50th percentile (median) value of 'Flag_temps' | |
| threshold = df['flag_TEMPS'].quantile(percent) | |
| # Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
| df['USE_TEMPS'] = np.where(df['flag_TEMPS'] >= threshold, 1, 0) | |
| # + | |
| percent=0.3 | |
| df['USE_TEMPS_noda'] = np.zeros(shape=len(df)) | |
| # Calculate the 50th percentile (median) value of 'Flag_temps' | |
| threshold = df['flag_TEMPSnoda'].quantile(percent) | |
| # Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
| df['USE_TEMPS_noda'] = np.where(df['flag_TEMPSnoda'] >= threshold, 1, 0) | |
| # + | |
| percent=0.3 | |
| df['USE_TEMPS_L15'] = np.zeros(shape=len(df)) | |
| # Calculate the 50th percentile (median) value of 'Flag_temps' | |
| threshold = df['flag_L15'].quantile(percent) | |
| # Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold) | |
| df['USE_TEMPS_L15'] = np.where(df['flag_L15'] >= threshold, 1, 0) | |
| # - | |
| merged_df_temps = merged_df.merge(df, left_on='ID_catfull', right_on='ID') | |
| # Corresponding redshift column names | |
| redshift_columns = [ | |
| 'REDSHIFT_RF', | |
| 'REDSHIFT_PHOSPHOROS', | |
| 'REDSHIFT_LEPHARE', | |
| 'REDSHIFT_DNF', | |
| 'REDSHIFT_FRANKENZ', | |
| 'REDSHIFT_METAPHOR', | |
| 'REDSHIFT_ADABOOST', | |
| 'REDSHIFT_GPZ', | |
| 'REDSHIFT_CPZ', | |
| 'REDSHIFT_NNPZ' | |
| ] | |
| redshift_columns = redshift_columns + ['TEMPS', 'TEMPS_noda', 'TEMPS_L15'] | |
| use_columns = use_columns + ['USE_TEMPS','USE_TEMPS_noda', 'USE_TEMPS_L15'] | |
| merged_df_temps = merged_df_temps[merged_df_temps.VIS <25] | |
| scatter, outliers, size =[],[], [] | |
| for method, use in(zip(redshift_columns, use_columns)): | |
| print(method) | |
| #df_method = merged_df_temps.dropna(subset=method) | |
| df_method = merged_df_temps[(merged_df_temps.loc[:, method]>0.2)&(merged_df_temps.loc[:, method]<2.6)] | |
| df_method = df_method[df_method.VIS<24.5] | |
| norm_size = len(df_method) | |
| df_method = df_method[df_method.loc[:, use]==1] | |
| zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z) | |
| scatter.append(nmad(zerr)) | |
| outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
| size.append(len(df_method)/norm_size) | |
| print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method) /norm_size ) | |
| scatter_faint, outliers_faint, size_faint =[],[], [] | |
| for method, use in(zip(redshift_columns, use_columns)): | |
| print(method) | |
| #df_method = merged_df_temps.dropna(subset=method) | |
| df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<25)] | |
| #df_method = df_method[df_method.loc[:, use]==1] | |
| #df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<24.5)] | |
| zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z) | |
| scatter_faint.append(nmad(zerr)) | |
| outliers_faint.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method)) | |
| size_faint.append(len(df_method)) | |
| print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method)) | |
| # + | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| from pastamarkers import markers | |
| # Define labels for the models | |
| labs = [ | |
| 'RF', 'PHOSPHOROS', 'LEPHARE', 'DNF', 'FRANKENZ', 'METAPHOR', | |
| 'ADABOOST', 'GPZ', 'CPZ', 'NNPZ', 'TEMPS', 'TEMPS - no DA', 'TEMPS - L15' | |
| ] | |
| markers_pasta = [markers.penne, markers.conchiglie, markers.tortellini, markers.creste, markers.spaghetti, markers.ravioli, markers.tagliatelle, markers.mezzelune,markers.puntine, markers.stelline , 's', 'o', '^'] | |
| labs_faint = [f"{lab}_faint" for lab in labs] # Labels for the faint data | |
| # Colors from colormap | |
| cmap = plt.get_cmap('tab20') | |
| colors = [cmap(i / len(labs)) for i in range(len(labs))] | |
| # Create subplots with 2 panels stacked vertically | |
| fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), sharex=False) | |
| # Plotting for the top panel | |
| for i in range(len(labs)): | |
| if labs[i] == 'TEMPS - no DA' or labs[i] == 'TEMPS - L15': | |
| ax1.scatter(np.nan, np.nan, color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
| elif labs[i]=='CPZ': | |
| ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
| ax1.text(outliers[i] * 100 -0.2, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
| elif labs[i]=='ADABOOST': | |
| ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
| ax1.text(outliers[i] * 100 - 0.5, scatter[i] - 0.004, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
| else: | |
| ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
| ax1.text(outliers[i] * 100 - 0.5, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom') | |
| # Customizations for the top plot | |
| ax1.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24) | |
| ax1.legend(fontsize=14) | |
| ax1.tick_params(axis='both', which='major', labelsize=20) | |
| # Plotting for the bottom panel (faint data) | |
| for i in range(len(labs)): | |
| ax2.scatter(outliers_faint[i] * 100, scatter_faint[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300) | |
| # Customizations for the bottom plot | |
| ax2.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24) | |
| ax2.set_xlabel('Outlier fraction [%]', fontsize=24) | |
| ax2.tick_params(axis='both', which='major', labelsize=20) | |
| # Display the plot | |
| plt.tight_layout() | |
| #plt.savefig('Comparison_paper.pdf', bbox_inches='tight') | |
| plt.show() | |
| # - | |
| cat_val_z = cat_val[['RA','DEC']].merge(cat_all[['RA','DEC','z_spec_S15','photo_z_L15','reliable_S15','mu_class_L07']], on = ['RA','DEC']) | |
| merged_df = merged_df.merge(cat_val_z, on = ['RA','DEC']) | |