Spaces:

lauracabayol
/

TEMPS

Runtime error

App Files Files Community

lauracabayol commited on Oct 21, 2024

Commit

00f4790

unverified ·

2 Parent(s): d1e8fb6 b25063d

Merge pull request #1 from lauracabayol/clean_code

Browse files

Files changed (16) hide show

.gitignore +4 -3
notebooks/{Fig7_colourspace.py → Colourspace.py} +36 -37
notebooks/Comparison_methodology.py +517 -0
notebooks/Feature_space.py +24 -256
notebooks/Fig6_qualitycut.py +0 -164
notebooks/{Fig2_NMAD.py → NMAD.py} +30 -45
notebooks/{Fig3_PIT_CRPS.py → PIT_CRPS.py} +31 -39
notebooks/Qualitycut.py +241 -0
notebooks/Table_metrics.py +21 -23
notebooks/nz.py +215 -0
notebooks/{Fig4_pz_examples.py → pz_examples.py} +23 -42
temps/archive.py +91 -25
temps/plots.py +41 -62
temps/temps.py +225 -216
temps/temps_arch.py +11 -17
temps/utils.py +58 -144

.gitignore CHANGED Viewed

@@ -1,6 +1,7 @@
-/temps/__pycache__/*
-/notebooks/.ipynb_checkpoints/
-/notebooks/*.ipynb
 /notebooks/developer_notebooks
 temps/.ipynb_checkpoints/
 *.ipynb

+temps/__pycache__/*
+notebooks/.ipynb_checkpoints/
+notebooks/cache
+notebooks/*.ipynb
 /notebooks/developer_notebooks
 temps/.ipynb_checkpoints/
 *.ipynb

notebooks/{Fig7_colourspace.py → Colourspace.py} RENAMED Viewed

@@ -5,11 +5,11 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
 # # FIGURE COLOURSPACE IN THE PAPER
@@ -23,6 +23,7 @@ import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
 #matplotlib settings
 from matplotlib import rcParams
@@ -30,19 +31,11 @@ import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-# +
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-from plots import plot_nz
-# -
 def estimate_som_map(df, plot_arg='z', nx=40, ny=40):
     """
@@ -98,10 +91,14 @@ def plot_som_map(som_data, plot_arg = 'z', vmin=0, vmax=1):
 # ### LOAD DATA
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
@@ -116,27 +113,29 @@ ID = cat['ID']
 VISmag = cat['MAG_VIS']
 zsflag = cat['reliable_S15']
-photoz_archive = archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
 # +
 dfs = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
     nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr ,pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
                                 return_pz=True)
     # Create a DataFrame with the desired columns
-    df = pd.DataFrame(np.c_[ID, VISmag,z, flag, ztarget,zsflag,zerr, specz_or_photo],
-                      columns=['ID','VISmag','z','zflag', 'ztarget','zsflag','zuncert','S15_L15_flag'])
     # Calculate additional columns or operations if needed
     df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
@@ -152,15 +151,15 @@ for il, lab in enumerate(['z','L15','DA']):
 # ### LOAD TRAINED MODELS AND EVALUATE PDFs AND REDSHIFT
 #define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
 df_z = dfs['z']
 df_z_DA = dfs['DA']
 # ##### LOAD TRAIN SOM ON TRAINING DATA
-df_som = pd.read_csv(os.path.join(parent_dir,'som_dataframe.csv'), header = 0, sep =',')
 df_z = df_z.merge(df_som, on = 'ID')
 df_z_DA = df_z_DA.merge(df_som, on = 'ID')
@@ -171,10 +170,10 @@ df_l15 = df_z[(df_z.ztarget>0)]
 df_l15_DA = df_z_DA[(df_z_DA.ztarget>0)]
 df_l15_euclid = df_z[(df_z.VISmag <24.5) & (df_z.z > 0.2) & (df_z.z < 2.6)]
-df_l15_euclid_cut= df_l15_euclid[df_l15_euclid.zflag>0.033]
 df_l15_euclid_da = df_z_DA[(df_z_DA.VISmag <24.5) & (df_z_DA.z > 0.2) & (df_z_DA.z < 2.6)]
-df_l15_euclid_cut_da= df_l15_euclid_da[df_l15_euclid_da.zflag>0.018]
 # ## MAKE SOM PLOT
@@ -186,7 +185,7 @@ fig, axs = plt.subplots(6, 4, figsize=(13, 15), sharex=True, sharey=True, gridsp
 # Plot in the top row (axs[0, i])
 #top row, spectroscopic sample
 columns = ['ztarget','z','zwerr','count']
-titles = [r'$z_{true}$',r'$z$',r'$z_{\rm error}$','Counts']
 limits = [[0,4],[0,4],[-0.5,0.5],[0,50]]
 for ii in range(4):
     som_data = estimate_som_map(df_zspec, plot_arg=columns[ii], nx=40, ny=40)
@@ -245,13 +244,13 @@ axs[4, 0].set_ylabel(r'$y$', fontsize=14)
 axs[5, 0].set_ylabel(r'$y$', fontsize=14)
-fig.text(0.09, 0.815, r'$z_{\rm s}$ sample', va='center', rotation='vertical', fontsize=16)
-fig.text(0.09, 0.69, r'L15 sample', va='center', rotation='vertical', fontsize=16)
-fig.text(0.09, 0.56, r'L15 sample + DA', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.44, r'$Euclid$ sample + DA', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.3, r'$Euclid$ sample + QC', va='center', rotation='vertical', fontsize=14)
-fig.text(0.09, 0.17, r'$Euclid$ sample + DA + QC', va='center', rotation='vertical', fontsize=13)
 plt.savefig('SOM_colourspace.pdf', format='pdf', bbox_inches='tight', dpi=300)

 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
 # # FIGURE COLOURSPACE IN THE PAPER
 from astropy.io import fits
 from astropy.table import Table
 import torch
+from pathlib import Path
 #matplotlib settings
 from matplotlib import rcParams
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
 def estimate_som_map(df, plot_arg='z', nx=40, ny=40):
     """
 # ### LOAD DATA
+#define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(parent_dir/filename_valid)
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
 VISmag = cat['MAG_VIS']
 zsflag = cat['reliable_S15']
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 # +
 dfs = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
+    nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
     nn_z = MeasureZ(num_gauss=6)
+    nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt',map_location=torch.device('cpu')))
+    temps_module = TempsModule(nn_features, nn_z)
+    z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
                                 return_pz=True)
     # Create a DataFrame with the desired columns
+    df = pd.DataFrame(np.c_[ID, VISmag,z,odds, ztarget,zsflag, specz_or_photo],
+                      columns=['ID','VISmag','z', 'odds','ztarget','zsflag','S15_L15_flag'])
     # Calculate additional columns or operations if needed
     df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
 # ### LOAD TRAINED MODELS AND EVALUATE PDFs AND REDSHIFT
 #define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 df_z = dfs['z']
 df_z_DA = dfs['DA']
 # ##### LOAD TRAIN SOM ON TRAINING DATA
+df_som = pd.read_csv(parent_dir/'som_dataframe.csv', header = 0, sep =',')
 df_z = df_z.merge(df_som, on = 'ID')
 df_z_DA = df_z_DA.merge(df_som, on = 'ID')
 df_l15_DA = df_z_DA[(df_z_DA.ztarget>0)]
 df_l15_euclid = df_z[(df_z.VISmag <24.5) & (df_z.z > 0.2) & (df_z.z < 2.6)]
+df_l15_euclid_cut= df_l15_euclid[df_l15_euclid.odds>df_l15_euclid['odds'].quantile(0.2)]
 df_l15_euclid_da = df_z_DA[(df_z_DA.VISmag <24.5) & (df_z_DA.z > 0.2) & (df_z_DA.z < 2.6)]
+df_l15_euclid_cut_da= df_l15_euclid_da[df_l15_euclid_da.odds>df_l15_euclid['odds'].quantile(0.2)]
 # ## MAKE SOM PLOT
 # Plot in the top row (axs[0, i])
 #top row, spectroscopic sample
 columns = ['ztarget','z','zwerr','count']
+titles = [r'$z_{true}$ (A)',r'$z$ (B)',r'$z_{\rm error}$ (C)','Counts']
 limits = [[0,4],[0,4],[-0.5,0.5],[0,50]]
 for ii in range(4):
     som_data = estimate_som_map(df_zspec, plot_arg=columns[ii], nx=40, ny=40)
 axs[5, 0].set_ylabel(r'$y$', fontsize=14)
+fig.text(0.09, 0.815, r'$z_{\rm s}$ samp. (1)', va='center', rotation='vertical', fontsize=16)
+fig.text(0.09, 0.69, r'L15 samp. (2)', va='center', rotation='vertical', fontsize=16)
+fig.text(0.09, 0.56, r'L15 samp. + DA (3)', va='center', rotation='vertical', fontsize=14)
+fig.text(0.09, 0.44, r'$Euclid$ samp. + DA (4)', va='center', rotation='vertical', fontsize=14)
+fig.text(0.09, 0.3, r'$Euclid$ samp. + QC (5)', va='center', rotation='vertical', fontsize=14)
+fig.text(0.09, 0.17, r'(5) + DA ', va='center', rotation='vertical', fontsize=13)
 plt.savefig('SOM_colourspace.pdf', format='pdf', bbox_inches='tight', dpi=300)

notebooks/Comparison_methodology.py ADDED Viewed

	@@ -0,0 +1,517 @@

+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.2
+#   kernelspec:
+#     display_name: temps
+#     language: python
+#     name: temps
+# ---
+# +
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from astropy.io import fits
+import os
+from astropy.table import Table
+from temps.utils import nmad
+from scipy import stats
+from pathlib import Path
+# -
+#define here the directory containing the photometric catalogues
+parent_dir = '/data/astro/scratch/lcabayol/EUCLID/DAz/DC2_results_to_share/'
+# +
+# List of FITS files to be processed
+fits_files = [
+    'GDE_RF_full.fits',
+    'GDE_PHOSPHOROS_V2_full.fits',
+    'OIL_LEPHARE_full.fits',
+    'JDV_DNF_A_full.fits',
+    'JSP_FRANKENZ_full.fits',
+    'MBR_METAPHOR_full.fits',
+    'GDE_ADABOOST_full.fits',
+    'CSC_GPZ_best_full.fits',
+    'SFO_CPZ_full.fits',
+    'AAL_NNPZ_V3_full.fits'
+]
+# Corresponding redshift column names
+redshift_columns = [
+    'REDSHIFT_RF',
+    'REDSHIFT_PHOSPHOROS',
+    'REDSHIFT_LEPHARE',
+    'REDSHIFT_DNF',
+    'REDSHIFT_FRANKENZ',
+    'REDSHIFT_METAPHOR',
+    'REDSHIFT_ADABOOST',
+    'REDSHIFT_GPZ',
+    'REDSHIFT_CPZ',
+    'REDSHIFT_NNPZ'
+]
+# Initialize an empty DataFrame for merging
+merged_df = pd.DataFrame()
+# Process each FITS file
+for fits_file, redshift_col in zip(fits_files, redshift_columns):
+    print(fits_file)
+    # Open the FITS file
+    hdu_list = fits.open(os.path.join(parent_dir,fits_file))
+    df = Table(hdu_list[1].data).to_pandas()
+    df = df[df.REDSHIFT!=0]
+    df = df[['ID', 'VIS','SPECZ', 'REDSHIFT']].rename(columns={'REDSHIFT': redshift_col})
+    # Merge with the main DataFrame
+    if merged_df.empty:
+        merged_df = df
+    else:
+        merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ'], how='outer')
+# -
+# ## OPEN DATA
+# +
+modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(modules_dir/filename_valid)
+cat_full = Table(hdu_list[1].data).to_pandas()
+cat_full = cat_full[['ID','z_spec_S15','reliable_S15','mu_class_L07']]
+merged_df['reliable_S15'] = cat_full.reliable_S15
+merged_df['z_spec_S15'] = cat_full.z_spec_S15
+merged_df['mu_class_L07'] = cat_full.mu_class_L07
+merged_df['ID_catfull'] = cat_full.ID
+# -
+merged_df_specz  = merged_df[(merged_df.z_spec_S15>0)&(merged_df.SPECZ>0)&(merged_df.reliable_S15==1)&(merged_df.mu_class_L07==1)&(merged_df.VIS!=np.inf)]
+# ## ONLY SPECZ SAMPLE
+scatter, outliers =[],[]
+for im, method in enumerate(redshift_columns):
+    print(method)
+    df_method = merged_df_specz.dropna(subset=method)
+    zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ)
+    print(len(zerr[np.abs(zerr)>0.15]) /len(zerr))
+    scatter.append(nmad(zerr))
+    outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method))
+# +
+labs = [
+    'RF',
+    'PHOSPHOROS',
+    'LEPHARE',
+    'DNF',
+    'FRANKENZ',
+    'METAPHOR',
+    'ADABOOST',
+    'GPZ',
+    'CPZ',
+    'NNPZ',
+]
+# Colors from colormap
+cmap = plt.get_cmap('tab20')
+colors = [cmap(i / len(labs)) for i in range(len(labs))]
+# Plotting
+plt.figure(figsize=(10, 6))
+for i in range(len(labs)):
+    plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^')
+# Adding legend
+plt.legend(fontsize=12)
+plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14)
+plt.xlabel('Outlier fraction [%]', fontsize=14)
+plt.xticks(fontsize=14)
+plt.yticks(fontsize=14)
+plt.xlim(5,35)
+plt.ylim(0,0.14)
+# Display plot
+plt.show()
+# -
+# ### ADD TEMPS PREDICTIONS
+import torch
+from temps.archive import Archive
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
+# +
+data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(data_dir/filename_valid)
+cat_phot = Table(hdu_list[1].data).to_pandas()
+# -
+cat_phot = cat_phot[cat_phot.ID.isin(merged_df_specz.ID_catfull)]
+# +
+photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5',
+                         only_zspec=True)
+f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot)
+col, colerr = photoz_archive._to_colors(f, ferr)
+ID = cat_phot.ID
+# +
+modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models')
+nn_features = EncoderPhotometry()
+nn_features.load_state_dict(torch.load(modules_dir / f'modelF_DA.pt',map_location=torch.device('cpu')))
+nn_z = MeasureZ(num_gauss=6)
+nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_DA.pt', map_location=torch.device('cpu')))
+temps_module = TempsModule(nn_features, nn_z)
+z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
+                            return_pz=True)
+df = pd.DataFrame(np.c_[ID, z],
+                  columns=['ID','TEMPS'])
+df = df.dropna()
+# -
+merged_df_specz= merged_df_specz.merge(df, left_on='ID_catfull', right_on='ID')
+# Corresponding redshift column names
+redshift_columns = redshift_columns  + ['TEMPS']
+scatter, outliers =[],[]
+for im, method in enumerate(redshift_columns):
+    print(method)
+    df_method = merged_df_specz.dropna(subset=method)
+    zerr = (df_method.SPECZ - df_method[method] ) / (1 + df_method.SPECZ)
+    print(len(zerr[np.abs(zerr)>0.15]) /len(zerr))
+    scatter.append(nmad(zerr))
+    outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method))
+# +
+labs = [
+    'RF',
+    'PHOSPHOROS',
+    'LEPHARE',
+    'DNF',
+    'FRANKENZ',
+    'METAPHOR',
+    'ADABOOST',
+    'GPZ',
+    'CPZ',
+    'NNPZ',
+    'TEMPS'
+]
+# Colors from colormap
+cmap = plt.get_cmap('tab20')
+colors = [cmap(i / len(labs)) for i in range(len(labs))]
+# Plotting
+plt.figure(figsize=(10, 6))
+for i in range(len(labs)):
+    plt.scatter(outliers[i]*100, scatter[i], color=colors[i], label=labs[i], marker = '^')
+# Adding legend
+plt.legend(fontsize=12)
+plt.ylabel(r'NMAD $[\Delta z]$', fontsize=14)
+plt.xlabel('Outlier fraction [%]', fontsize=14)
+plt.xticks(fontsize=14)
+plt.yticks(fontsize=14)
+plt.xlim(5,35)
+plt.ylim(0,0.14)
+# Display plot
+plt.show()
+# -
+# ## ANOTHER SELECTION
+# +
+# List of FITS files to be processed
+fits_files = [
+    'GDE_RF_full.fits',
+    'GDE_PHOSPHOROS_V2_full.fits',
+    'OIL_LEPHARE_full.fits',
+    'JDV_DNF_A_full.fits',
+    'JSP_FRANKENZ_full.fits',
+    'MBR_METAPHOR_full.fits',
+    'GDE_ADABOOST_full.fits',
+    'CSC_GPZ_best_full.fits',
+    'SFO_CPZ_full.fits',
+    'AAL_NNPZ_V3_full.fits'
+]
+# Corresponding redshift column names
+redshift_columns = [
+    'REDSHIFT_RF',
+    'REDSHIFT_PHOSPHOROS',
+    'REDSHIFT_LEPHARE',
+    'REDSHIFT_DNF',
+    'REDSHIFT_FRANKENZ',
+    'REDSHIFT_METAPHOR',
+    'REDSHIFT_ADABOOST',
+    'REDSHIFT_GPZ',
+    'REDSHIFT_CPZ',
+    'REDSHIFT_NNPZ'
+]
+use_columns = [
+    'USE_RF',
+    'USE_PHOSPHOROS',
+    'USE_LEPHARE',
+    'USE_DNF',
+    'USE_FRANKENZ',
+    'USE_METAPHOR',
+    'USE_ADABOOST',
+    'USE_GPZ',
+    'USE_CPZ',
+    'USE_NNPZ'
+]
+# Initialize an empty DataFrame for merging
+merged_df = pd.DataFrame()
+# Process each FITS file
+for fits_file, redshift_col,use_col  in zip(fits_files, redshift_columns,use_columns):
+    print(fits_file)
+    # Open the FITS file
+    hdu_list = fits.open(os.path.join(parent_dir,fits_file))
+    df = Table(hdu_list[1].data).to_pandas()
+    df = df[df.REDSHIFT!=0]
+    df = df[['ID', 'VIS', 'SPECZ', 'REDSHIFT', 'L15PHZ', 'USE']].rename(columns={'REDSHIFT': redshift_col, 'USE': use_col})
+    # Merge with the main DataFrame
+    if merged_df.empty:
+        merged_df = df
+    else:
+        merged_df = pd.merge(merged_df, df, on=['ID', 'VIS', 'SPECZ','L15PHZ'], how='outer')
+# -
+merged_df['comp_z'] = np.where(merged_df['SPECZ'] > 0, merged_df['SPECZ'], merged_df['L15PHZ'])
+#merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS>23.5)]
+merged_df = merged_df[(merged_df.comp_z>0)&(merged_df.comp_z<4)&(merged_df.VIS<25)]
+# +
+modules_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(modules_dir/filename_valid)
+cat_full = Table(hdu_list[1].data).to_pandas()
+merged_df['ID_catfull'] = cat_full.ID
+# +
+data_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(data_dir/filename_valid)
+cat_phot = Table(hdu_list[1].data).to_pandas()
+# -
+cat_phot = cat_phot[cat_phot.ID.isin(merged_df.ID_catfull)]
+# +
+photoz_archive = Archive(path = '/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5',
+                         only_zspec=False)
+f, ferr = photoz_archive._extract_fluxes(catalogue= cat_phot)
+col, colerr = photoz_archive._to_colors(f, ferr)
+ID = cat_phot.ID
+# +
+modules_dir = Path('/nfs/pic.es/user/l/lcabayol/EUCLID/TEMPS/data/models')
+nn_features = EncoderPhotometry()
+nn_features.load_state_dict(torch.load(modules_dir/f'modelF_DA.pt',map_location=torch.device('cpu')))
+nn_z = MeasureZ(num_gauss=6)
+nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_DA.pt',map_location=torch.device('cpu')))
+temps_module = TempsModule(nn_features, nn_z)
+z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
+                            return_pz=True)
+nn_features = EncoderPhotometry()
+nn_features.load_state_dict(torch.load(modules_dir/f'modelF_z.pt',map_location=torch.device('cpu')))
+nn_z = MeasureZ(num_gauss=6)
+nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_z.pt',map_location=torch.device('cpu')))
+temps_module = TempsModule(nn_features, nn_z)
+znoda, pz, odds_noda = temps_module.get_pz(input_data=torch.Tensor(col),
+                            return_pz=True)
+nn_features = EncoderPhotometry()
+nn_features.load_state_dict(torch.load(modules_dir/f'modelF_L15.pt',map_location=torch.device('cpu')))
+nn_z = MeasureZ(num_gauss=6)
+nn_z.load_state_dict(torch.load(modules_dir/f'modelZ_L15.pt',map_location=torch.device('cpu')))
+temps_module = TempsModule(nn_features, nn_z)
+z_L15, pz, odds_L15 = temps_module.get_pz(input_data=torch.Tensor(col),
+                            return_pz=True)
+df = pd.DataFrame(np.c_[ID, z, odds, znoda, odds_noda,z_L15, odds_L15],
+                  columns=['ID','TEMPS', 'flag_TEMPS', 'TEMPS_noda', 'flag_TEMPSnoda', 'TEMPS_L15', 'flag_L15'])
+df = df.dropna()
+# +
+percent=0.3
+df['USE_TEMPS'] = np.zeros(shape=len(df))
+# Calculate the 50th percentile (median) value of 'Flag_temps'
+threshold = df['flag_TEMPS'].quantile(percent)
+# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold)
+df['USE_TEMPS'] = np.where(df['flag_TEMPS'] >= threshold, 1, 0)
+# +
+percent=0.3
+df['USE_TEMPS_noda'] = np.zeros(shape=len(df))
+# Calculate the 50th percentile (median) value of 'Flag_temps'
+threshold = df['flag_TEMPSnoda'].quantile(percent)
+# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold)
+df['USE_TEMPS_noda'] = np.where(df['flag_TEMPSnoda'] >= threshold, 1, 0)
+# +
+percent=0.3
+df['USE_TEMPS_L15'] = np.zeros(shape=len(df))
+# Calculate the 50th percentile (median) value of 'Flag_temps'
+threshold = df['flag_L15'].quantile(percent)
+# Set 'USE_TEMPS' to 1 if 'Flag_temps' is in the top 50% (greater than or equal to the threshold)
+df['USE_TEMPS_L15'] = np.where(df['flag_L15'] >= threshold, 1, 0)
+# -
+merged_df_temps = merged_df.merge(df, left_on='ID_catfull', right_on='ID')
+# Corresponding redshift column names
+redshift_columns = [
+    'REDSHIFT_RF',
+    'REDSHIFT_PHOSPHOROS',
+    'REDSHIFT_LEPHARE',
+    'REDSHIFT_DNF',
+    'REDSHIFT_FRANKENZ',
+    'REDSHIFT_METAPHOR',
+    'REDSHIFT_ADABOOST',
+    'REDSHIFT_GPZ',
+    'REDSHIFT_CPZ',
+    'REDSHIFT_NNPZ'
+]
+redshift_columns = redshift_columns  + ['TEMPS', 'TEMPS_noda', 'TEMPS_L15']
+use_columns = use_columns  + ['USE_TEMPS','USE_TEMPS_noda', 'USE_TEMPS_L15']
+merged_df_temps = merged_df_temps[merged_df_temps.VIS <25]
+scatter, outliers, size =[],[], []
+for method, use in(zip(redshift_columns, use_columns)):
+    print(method)
+    #df_method = merged_df_temps.dropna(subset=method)
+    df_method = merged_df_temps[(merged_df_temps.loc[:, method]>0.2)&(merged_df_temps.loc[:, method]<2.6)]
+    df_method = df_method[df_method.VIS<24.5]
+    norm_size = len(df_method)
+    df_method = df_method[df_method.loc[:, use]==1]
+    zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z)
+    scatter.append(nmad(zerr))
+    outliers.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method))
+    size.append(len(df_method)/norm_size)
+    print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method) /norm_size )
+scatter_faint, outliers_faint, size_faint =[],[], []
+for method, use in(zip(redshift_columns, use_columns)):
+    print(method)
+    #df_method = merged_df_temps.dropna(subset=method)
+    df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<25)]
+    #df_method = df_method[df_method.loc[:, use]==1]
+    #df_method = merged_df_temps[(merged_df_temps.loc[:,'VIS']>23.5)&(merged_df_temps.loc[:,'VIS']<24.5)]
+    zerr = (df_method.comp_z - df_method[method] ) / (1 + df_method.comp_z)
+    scatter_faint.append(nmad(zerr))
+    outliers_faint.append(len(zerr[np.abs(zerr)>0.15]) / len(df_method))
+    size_faint.append(len(df_method))
+    print(nmad(zerr),len(zerr[np.abs(zerr)>0.15]) / len(df_method), len(df_method))
+# +
+import matplotlib.pyplot as plt
+import numpy as np
+from pastamarkers import markers
+# Define labels for the models
+labs = [
+    'RF', 'PHOSPHOROS', 'LEPHARE', 'DNF', 'FRANKENZ', 'METAPHOR',
+    'ADABOOST', 'GPZ', 'CPZ', 'NNPZ', 'TEMPS', 'TEMPS - no DA', 'TEMPS - L15'
+]
+markers_pasta = [markers.penne, markers.conchiglie, markers.tortellini, markers.creste, markers.spaghetti,  markers.ravioli, markers.tagliatelle, markers.mezzelune,markers.puntine, markers.stelline , 's', 'o',  '^']
+labs_faint = [f"{lab}_faint" for lab in labs]  # Labels for the faint data
+# Colors from colormap
+cmap = plt.get_cmap('tab20')
+colors = [cmap(i / len(labs)) for i in range(len(labs))]
+# Create subplots with 2 panels stacked vertically
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12), sharex=False)
+# Plotting for the top panel
+for i in range(len(labs)):
+    if labs[i] == 'TEMPS - no DA' or  labs[i] == 'TEMPS - L15':
+        ax1.scatter(np.nan, np.nan, color=colors[i], label=labs[i], marker=markers_pasta[i], s=300)
+    elif labs[i]=='CPZ':
+        ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300)
+        ax1.text(outliers[i] * 100 -0.2, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom')
+    elif labs[i]=='ADABOOST':
+        ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300)
+        ax1.text(outliers[i] * 100 - 0.5, scatter[i] - 0.004, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom')
+    else:
+        ax1.scatter(outliers[i] * 100, scatter[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300)
+        ax1.text(outliers[i] * 100 - 0.5, scatter[i] + 0.001, f'{int(np.around(size[i] * 100))}', fontsize=12, verticalalignment='bottom')
+# Customizations for the top plot
+ax1.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24)
+ax1.legend(fontsize=14)
+ax1.tick_params(axis='both', which='major', labelsize=20)
+# Plotting for the bottom panel (faint data)
+for i in range(len(labs)):
+    ax2.scatter(outliers_faint[i] * 100, scatter_faint[i], color=colors[i], label=labs[i], marker=markers_pasta[i], s=300)
+# Customizations for the bottom plot
+ax2.set_ylabel(r'NMAD $[\Delta z]$', fontsize=24)
+ax2.set_xlabel('Outlier fraction [%]', fontsize=24)
+ax2.tick_params(axis='both', which='major', labelsize=20)
+# Display the plot
+plt.tight_layout()
+#plt.savefig('Comparison_paper.pdf', bbox_inches='tight')
+plt.show()
+# -
+cat_val_z = cat_val[['RA','DEC']].merge(cat_all[['RA','DEC','z_spec_S15','photo_z_L15','reliable_S15','mu_class_L07']], on = ['RA','DEC'])
+merged_df = merged_df.merge(cat_val_z, on = ['RA','DEC'])

notebooks/Feature_space.py CHANGED Viewed

@@ -5,11 +5,11 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
 # # DOMAIN ADAPTATION INTUITION
@@ -23,6 +23,9 @@ import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
 #matplotlib settings
 from matplotlib import rcParams
@@ -30,28 +33,22 @@ import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-# +
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-from plots import plot_nz
-# -
 # ## LOAD DATA
 #define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
@@ -70,7 +67,7 @@ cat['specz_or_photo']=specz_or_photo
 # ### EXTRACT PHOTOMETRY
-photoz_archive = archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
@@ -80,7 +77,7 @@ features_all = np.zeros((3,len(cat),10))
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
     features = nn_features(torch.Tensor(col))
     features = features.detach().cpu().numpy()
@@ -132,7 +129,7 @@ autoencoder = Autoencoder(input_dim=10,
 criterion = nn.L1Loss()
 optimizer = optim.Adam(autoencoder.parameters(), lr=0.0001)
-# +
 # Define the number of epochs
 num_epochs = 100
 for epoch in range(num_epochs):
@@ -162,9 +159,7 @@ print('Training finished')
 # #### EVALUTATE AUTOENCODER
-# + [markdown] jupyter={"source_hidden": true}
 # cat.to_csv('features_cat.csv', header=True, sep=',')
-# -
 indexes_specz = cat[(cat.specz_or_photo==0)&(cat.reliable_S15>0)].reset_index().index
@@ -173,6 +168,8 @@ for i in range(3):
     _, features = autoencoder(torch.Tensor(features_all[i]))
     features_all_reduced[i] = features.detach().cpu().numpy()
 # ### Plot the features
 start = 0
@@ -182,7 +179,6 @@ values_not_in_indexes_specz = all_values - set(indexes_specz)
 indexes_nospecz = sorted(values_not_in_indexes_specz)
 # +
-import seaborn as sns
 # Create subplots with three panels
 fig, axs = plt.subplots(1, 3, figsize=(15, 5))
@@ -223,14 +219,14 @@ axs[1].set_title('Trained on L15')
 # Third subplot
 features_all_reduced_nospecz = pd.DataFrame(features_all_reduced[2, indexes_nospecz, :]).drop_duplicates().values
-sns.kdeplot(x=features_all_reduced_nospecz[:, 0],
-            y=features_all_reduced_nospecz[:, 1],
             clip=(-1, 5),
             ax=axs[2],
             color='salmon',
             label='Wide-field sample')
-sns.kdeplot(x=features_all_reduced_specz[:, 0],
-            y=features_all_reduced_specz[:, 1],
             clip=(-1, 5),
             ax=axs[2],
             color='lightskyblue',
@@ -252,200 +248,7 @@ axs[2].legend(legend_handles, legend_labels, loc='upper right', fontsize=16)
 # Adjust layout
 plt.tight_layout()
-plt.savefig('Contourplot.pdf', bbox_inches='tight')
-plt.show()
-# -
-np.savetxt('features.txt',features_all_reduced.reshape(3*164816, 2))
-# +
-photoz_archive = archive(path = parent_dir,only_zspec=False)
-fig, ax = plt.subplots(ncols = 3, figsize=(15,4), sharex=True, sharey=True)
-colors = ['navy', 'goldenrod']
-titles = [r'Training: $z_s$', r'Training: L15',r'Training: $z_s$ + DA']
-x_min, x_max = -5,5
-y_min, y_max = -5,5
-x_grid, y_grid = np.meshgrid(np.linspace(x_min, x_max, 10), np.linspace(y_min, y_max, 10))
-xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    for it, target_type in enumerate(['L15','zs']):
-        if target_type=='zs':
-            cat_sub = photoz_archive._select_only_zspec(cat)
-            cat_sub = photoz_archive._clean_zspec_sample(cat_sub)
-        elif target_type=='L15':
-            cat_sub = photoz_archive._exclude_only_zspec(cat)
-        else:
-            assert False
-        cat_sub = photoz_archive._clean_photometry(cat_sub)
-        print(cat_sub.shape)
-        f, ferr = photoz_archive._extract_fluxes(cat_sub)
-        col, colerr = photoz_archive._to_colors(f, ferr)
-        features = nn_features(torch.Tensor(col))
-        features = features.detach().cpu().numpy()
-        #xy = np.vstack([features[:1000,0], features[:1000,1]])
-        #zd = gaussian_kde(xy)(xy)
-        #ax[il].scatter(features[:1000,0], features[:1000,1],c=zd, s=3)
-        xy = np.vstack([features[:,0], features[:,1]])
-        density_estimation = gaussian_kde(xy)
-        # Define grid for plotting density lines
-        xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-        density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-        # Plot contour lines representing density
-        ax[il].contour(x_grid, y_grid, density_grid, colors=colors[it], label = f'{target_type}')
-    ax[il].set_title(titles[il])
-    ax[il].set_xlim(-5,5)
-    ax[il].set_ylim(-5,5)
-ax[0].set_ylabel('Feature 1', fontsize=14)
-#plt.ylabel('Feature 2', fontsize=14)
-    #assert False
-# -
-H
-H
-xedges
-yedges
-# +
-import matplotlib.colors as colors
-from matplotlib import path
-import numpy as np
-from matplotlib import pyplot as plt
-try:
-    from astropy.convolution import Gaussian2DKernel, convolve
-    astro_smooth = True
-except ImportError as IE:
-    astro_smooth = False
-np.random.seed(123)
-#t = np.linspace(-5,1.2,1000)
-x = features[:1000,0]
-y = features[:1000,1]
-H, xedges, yedges = np.histogram2d(x,y, bins=(10,10))
-xmesh, ymesh = np.meshgrid(xedges[:-1], yedges[:-1])
-# Smooth the contours (if astropy is installed)
-if astro_smooth:
-    kernel = Gaussian2DKernel(x_stddev=1.)
-    H=convolve(H,kernel)
-fig,ax = plt.subplots(1, figsize=(7,6))
-clevels = ax.contour(xmesh,ymesh,H.T,lw=.9,cmap='winter')#,zorder=90)
-ax.scatter(x,y,s=3)
-#ax.set_xlim(-20,5)
-#ax.set_ylim(-20,5)
-# Identify points within contours
-#p = clevels.collections[0].get_paths()
-#inside = np.full_like(x,False,dtype=bool)
-#for level in p:
-#    inside |= level.contains_points(zip(*(x,y)))
-#ax.plot(x[~inside],y[~inside],'kx')
-#plt.show(block=False)
-# -
-density_grid
-features.shape, zd.shape
-# + jupyter={"outputs_hidden": true}
-xy = np.vstack([features[:,0], features[:,1]])
-zd = gaussian_kde(xy)(xy)
-plt.scatter(features[:,0], features[:,1],c=zd)
-# +
-# Make the base corner plot
-figure = corner.corner(features[:,:2], quantiles=[0.16, 0.84], show_titles=False, color ='crimson')
-corner.corner(samples2, fig=fig)
-ndim=2
-# Extract the axes
-axes = np.array(figure.axes).reshape((ndim, ndim))
-for a in axes[np.triu_indices(ndim)]:
-    a.remove()
-# +
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy.stats import gaussian_kde
-# Assuming 'features' is your data array with shape (n_samples, 2)
-# Calculate the density estimate
-xy = np.vstack([features[:,0], features[:,1]])
-density_estimation = gaussian_kde(xy)
-# Define grid for plotting density lines
-xy_grid = np.vstack([x_grid.ravel(), y_grid.ravel()])
-density_grid = density_estimation(xy_grid).reshape(x_grid.shape)
-# Plot contour lines representing density
-plt.contour(x_grid, y_grid, density_grid, colors='black')
-# Optionally, you can add a scatter plot on top of the density lines for better visualization
-#plt.scatter(features[:,0], features[:,1], color='blue', alpha=0.5)
-# Set labels and title
-plt.xlabel('Feature 1')
-plt.ylabel('Feature 2')
-plt.title('Density Lines Plot')
-# Show plot
 plt.show()
 # -
@@ -454,41 +257,6 @@ plt.show()
-corner_plot = corner.corner(Arinyo_preds,
-              labels=[r'$b$', r'$\beta$', '$q_1$', '$k_{vav}$','$a_v$','$b_v$','$k_p$','$q_2$'],
-              truths=Arinyo_coeffs_central[test_snap],
-              truth_color='crimson')
-import corner
-figure = corner.corner(features, quantiles=[0.16, 0.5, 0.84], show_titles=False)
-axes = np.array(fig.axes).reshape((ndim, ndim))
-for a in axes[np.triu_indices(ndim)]:
-    a.remove()
-# +
-# My data
-x = features[:,0]
-y = features[:,1]
-# Peform the kernel density estimate
-k = stats.gaussian_kde(np.vstack([x, y]))
-xi, yi = np.mgrid[-5:5,-5:5]
-zi = k(np.vstack([xi.flatten(), yi.flatten()]))
-fig = plt.figure()
-ax = fig.gca()
-CS = ax.contour(xi, yi, zi.reshape(xi.shape), colors='crimson')
-ax.set_xlim(-5, 5)
-ax.set_ylim(-5, 5)
-plt.show()
-# -

 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
 # # DOMAIN ADAPTATION INTUITION
 from astropy.io import fits
 from astropy.table import Table
 import torch
+from pathlib import Path
+import seaborn as sns
 #matplotlib settings
 from matplotlib import rcParams
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
+from temps.plots import plot_nz
 # ## LOAD DATA
 #define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(parent_dir/filename_valid)
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
 # ### EXTRACT PHOTOMETRY
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
+    nn_features.load_state_dict(torch.load(modules_dir/f'modelF_{lab}.pt',map_location=torch.device('cpu')))
     features = nn_features(torch.Tensor(col))
     features = features.detach().cpu().numpy()
 criterion = nn.L1Loss()
 optimizer = optim.Adam(autoencoder.parameters(), lr=0.0001)
+# + jupyter={"outputs_hidden": true}
 # Define the number of epochs
 num_epochs = 100
 for epoch in range(num_epochs):
 # #### EVALUTATE AUTOENCODER
 # cat.to_csv('features_cat.csv', header=True, sep=',')
 indexes_specz = cat[(cat.specz_or_photo==0)&(cat.reliable_S15>0)].reset_index().index
     _, features = autoencoder(torch.Tensor(features_all[i]))
     features_all_reduced[i] = features.detach().cpu().numpy()
+features_all.shape
 # ### Plot the features
 start = 0
 indexes_nospecz = sorted(values_not_in_indexes_specz)
 # +
 # Create subplots with three panels
 fig, axs = plt.subplots(1, 3, figsize=(15, 5))
 # Third subplot
 features_all_reduced_nospecz = pd.DataFrame(features_all_reduced[2, indexes_nospecz, :]).drop_duplicates().values
+sns.kdeplot(x=features_all_reduced[2, indexes_nospecz, 0],
+            y=features_all_reduced[2, indexes_nospecz, 1],
             clip=(-1, 5),
             ax=axs[2],
             color='salmon',
             label='Wide-field sample')
+sns.kdeplot(x=features_all_reduced[2, indexes_specz, 0],
+            y=features_all_reduced[2, indexes_specz,1],
             clip=(-1, 5),
             ax=axs[2],
             color='lightskyblue',
 # Adjust layout
 plt.tight_layout()
+#plt.savefig('Contourplot.pdf', bbox_inches='tight')
 plt.show()
 # -

notebooks/Fig6_qualitycut.py DELETED Viewed

@@ -1,164 +0,0 @@
-# ---
-# jupyter:
-#   jupytext:
-#     text_representation:
-#       extension: .py
-#       format_name: light
-#       format_version: '1.5'
-#       jupytext_version: 1.14.5
-#   kernelspec:
-#     display_name: insight
-#     language: python
-#     name: insight
-# ---
-# # FIGURE 6 IN THE PAPER
-# ## QUALITY CUTS
-# %load_ext autoreload
-# %autoreload 2
-import pandas as pd
-import numpy as np
-import os
-import torch
-from scipy import stats
-#matplotlib settings
-from matplotlib import rcParams
-import matplotlib.pyplot as plt
-rcParams["mathtext.fontset"] = "stix"
-rcParams["font.family"] = "STIXGeneral"
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# ### LOAD DATA (ONLY SPECZ)
-#define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-photoz_archive = archive(path = parent_dir,only_zspec=True,flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ])
-f_test_specz, ferr_test_specz, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
-# ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
-# +
-# Initialize an empty dictionary to store DataFrames
-dfs = {}
-for il, lab in enumerate(['z','L15','DA']):
-    nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
-    nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr, pz, flag, odds = temps.get_pz(input_data=torch.Tensor(f_test_specz),
-                                return_pz=True)
-    # Create a DataFrame with the desired columns
-    df = pd.DataFrame(np.c_[z, flag, odds, specz_test],
-                      columns=['z','zflag', 'odds' ,'ztarget'])
-    # Calculate additional columns or operations if needed
-    df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
-    # Drop any rows with NaN values
-    df = df.dropna()
-    # Assign the DataFrame to a key in the dictionary
-    dfs[lab] = df
-# -
-# ### STATISTICS BASED ON OUR QUALITY CUT
-# +
-bin_edges = stats.mstats.mquantiles(df.zflag, np.arange(0,1.01,0.05))
-scatter, eta, xlab, xmag, xzs, flagmean = [],[],[], [], [], []
-for k in range(len(bin_edges)-1):
-    edge_min = bin_edges[k]
-    edge_max = bin_edges[k+1]
-    df_bin = df[(df.zflag > edge_min)]
-    xlab.append(np.round(len(df_bin)/len(df),2)*100)
-    xzs.append(0.5*(df_bin.ztarget.min()+df_bin.ztarget.max()))
-    flagmean.append(np.mean(df_bin.zflag))
-    scatter.append(nmad(df_bin.zwerr))
-    eta.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df)*100)
-# -
-# ### STATISTICS BASED ON ODDS
-# +
-bin_edges = stats.mstats.mquantiles(df.odds, np.arange(0,1.01,0.05))
-scatter_odds, eta_odds,xlab_odds,  oddsmean = [],[],[], []
-for k in range(len(bin_edges)-1):
-    edge_min = bin_edges[k]
-    edge_max = bin_edges[k+1]
-    df_bin = df[(df.odds > edge_min)]
-    xlab_odds.append(np.round(len(df_bin)/len(df),2)*100)
-    oddsmean.append(np.mean(df_bin.zflag))
-    scatter_odds.append(nmad(df_bin.zwerr))
-    eta_odds.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df)*100)
-# -
-# ### PLOTS
-# +
-plt.plot(xlab_odds,scatter_odds, marker = '.', color ='crimson', label=r'$\theta(\Delta z)$', ls='--', alpha=0.5)
-plt.plot(xlab,scatter, marker = '.', color ='navy',label=r'$\xi = \theta(\Delta z)$')
-plt.ylabel(r'NMAD [$\Delta z\ /\ (1 + z_{\rm s})$]', fontsize=16)
-plt.xlabel('Completeness', fontsize=16)
-plt.yticks(fontsize=12)
-plt.xticks(np.arange(5,101,10), fontsize=12)
-plt.legend(fontsize=14)
-plt.savefig('Flag_nmad_zspec.pdf', bbox_inches='tight')
-plt.show()
-# +
-plt.plot(xlab_odds,eta_odds, marker='.', color ='crimson', label=r'$\theta(\Delta z)$', ls='--', alpha=0.5)
-plt.plot(xlab,eta, marker='.', color ='navy',label=r'$\xi = \theta(\Delta z)$')
-plt.yticks(fontsize=12)
-plt.xticks(np.arange(5,101,10), fontsize=12)
-plt.ylabel(r'$\eta$ [%]', fontsize=16)
-plt.xlabel('Completeness', fontsize=16)
-plt.legend()
-plt.savefig('Flag_eta_zspec.pdf', bbox_inches='tight')
-plt.show()
-# -

notebooks/{Fig2_NMAD.py → NMAD.py} RENAMED Viewed

@@ -6,15 +6,15 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
 # %% [markdown]
-# # FIGURE 2 IN THE PAPER
 # %% [markdown]
 # ## METRICS FOR THE DIFFERENT METHODS ON THE WIDE FIELD SAMPLE
@@ -43,15 +43,14 @@ rcParams["font.family"] = "STIXGeneral"
 # %%
-#insight modules
-import sys
-sys.path.append('../temps')
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
 # %%
@@ -62,15 +61,13 @@ eval_methods=True
 # %%
 #define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
 # %%
-#load catalogue and apply cuts
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
@@ -78,7 +75,6 @@ cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
 cat = cat[cat['MAG_VIS']<25]
 # %%
 ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
 specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
@@ -87,7 +83,7 @@ VISmag = cat['MAG_VIS']
 zsflag = cat['reliable_S15']
 # %%
-photoz_archive = archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
@@ -101,20 +97,21 @@ if eval_methods:
     for il, lab in enumerate(['z','L15','DA']):
         nn_features = EncoderPhotometry()
-        nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
         nn_z = MeasureZ(num_gauss=6)
-        nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-        temps = Temps_module(nn_features, nn_z)
-        z,zerr, zmode,pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
-                                    return_pz=True)
         # Create a DataFrame with the desired columns
-        df = pd.DataFrame(np.c_[ID, VISmag,z, zmode, flag, ztarget,zsflag,zerr, specz_or_photo],
-                          columns=['ID','VISmag','z', 'zmode','zflag', 'ztarget','zsflag','zuncert','S15_L15_flag'])
         # Calculate additional columns or operations if needed
-        df['zwerr'] = (df.zmode - df.ztarget) / (1 + df.ztarget)
         # Drop any rows with NaN values
         df = df.dropna()
@@ -135,36 +132,24 @@ dfs['DA']['zwerr'] = (dfs['DA'].z - dfs['DA'].ztarget) / (1 + dfs['DA'].ztarget)
 # %%
 if not eval_methods:
     dfs = {}
-    dfs['z'] = pd.read_csv(os.path.join(parent_dir, 'predictions_specztraining.csv'), header=0)
-    dfs['L15'] = pd.read_csv(os.path.join(parent_dir, 'predictions_speczL15training.csv'), header=0)
-    dfs['DA'] = pd.read_csv(os.path.join(parent_dir, 'predictions_speczDAtraining.csv'), header=0)
 # %% [markdown]
 # ### MAKE PLOT
 # %%
-plot_photoz(df_list,
-            nbins=8,
-            xvariable='VISmag',
-            metric='nmad',
-            type_bin='bin',
-            label_list = ['zs','zs+L15',r'TEMPS'],
-            save=False,
-            samp='L15'
-           )
 # %%
 plot_photoz(df_list,
             nbins=8,
             xvariable='VISmag',
-            metric='outliers',
             type_bin='bin',
             label_list = ['zs','zs+L15',r'TEMPS'],
             save=False,
             samp='L15'
            )
-# %%
-# %%

 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
 # %% [markdown]
+# # FIGURE METRICS
 # %% [markdown]
 # ## METRICS FOR THE DIFFERENT METHODS ON THE WIDE FIELD SAMPLE
 # %%
+import temps
+# %%
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
+from temps.plots import plot_photoz
 # %%
 # %%
 #define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 # %%
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+path_file = parent_dir / filename_valid  # Creating the path to the file
+hdu_list = fits.open(path_file)
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
 cat = cat[cat['MAG_VIS']<25]
 # %%
 ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
 specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
 zsflag = cat['reliable_S15']
 # %%
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
     for il, lab in enumerate(['z','L15','DA']):
         nn_features = EncoderPhotometry()
+        nn_features.load_state_dict(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
         nn_z = MeasureZ(num_gauss=6)
+        nn_z.load_state_dict(modules_dir / f'modelZ_{lab}.pt',map_location=torch.device('cpu')))
+        temps_module = TempsModule(nn_features, nn_z)
+        z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
+                                                    return_pz=True,
+                                                    return_flag=True)
         # Create a DataFrame with the desired columns
+        df = pd.DataFrame(np.c_[ID, VISmag,z, odds, ztarget,zsflag, specz_or_photo],
+                          columns=['ID','VISmag','z','odds', 'ztarget','zsflag','S15_L15_flag'])
         # Calculate additional columns or operations if needed
+        df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
         # Drop any rows with NaN values
         df = df.dropna()
 # %%
 if not eval_methods:
     dfs = {}
+    dfs['z'] = pd.read_csv(parent_dir /  'predictions_specztraining.csv', header=0)
+    dfs['L15'] = pd.read_csv(parent_dir /  'predictions_speczL15training.csv', header=0)
+    dfs['DA'] = pd.read_csv(parent_dir /  'predictions_speczDAtraining.csv', header=0)
 # %% [markdown]
 # ### MAKE PLOT
 # %%
+df_list = [dfs['z'], dfs['L15'], dfs['DA']]
 # %%
 plot_photoz(df_list,
             nbins=8,
             xvariable='VISmag',
+            metric='nmad',
             type_bin='bin',
             label_list = ['zs','zs+L15',r'TEMPS'],
             save=False,
             samp='L15'
            )

notebooks/{Fig3_PIT_CRPS.py → PIT_CRPS.py} RENAMED Viewed

@@ -1,93 +1,84 @@
 # ---
 # jupyter:
 #   jupytext:
-#     formats: ipynb,py:percent
 #     text_representation:
 #       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
-# %% [markdown]
-# # FIGURE 3 IN THE PAPER
-# %% [markdown]
 # ## PIT AND CRPS FOR THE THREE METHODS
-# %% [markdown]
 # ### LOAD PYTHON MODULES
-# %%
 # %load_ext autoreload
 # %autoreload 2
-# %%
 import pandas as pd
 import numpy as np
 import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
-# %%
 #matplotlib settings
 from matplotlib import rcParams
 import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-# %%
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from plots import plot_PIT, plot_crps
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# %% [markdown]
 # ### LOAD DATA
-# %%
-photoz_archive = archive(path = parent_dir,
                          only_zspec=False,
                          flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ],
                          target_test='L15')
 f_test, ferr_test, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
-# %% [markdown]
 # ## CREATE PIT; CRPS; SPECTROSCOPIC SAMPLE
-# %% [markdown]
 # This loads pre-trained models (for the sake of time). You can learn how to train the models in the Tutorial notebook.
-# %%
 # Initialize an empty dictionary to store DataFrames
 crps_dict = {}
 pit_dict = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
     nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    pit_list = temps.pit(input_data=torch.Tensor(f_test), target_data=torch.Tensor(specz_test))
-    crps_list = temps.crps(input_data=torch.Tensor(f_test), target_data=specz_test)
     # Assign the DataFrame to a key in the dictionary
@@ -95,7 +86,7 @@ for il, lab in enumerate(['z','L15','DA']):
     pit_dict[lab] = pit_list
-# %%
 plot_PIT(pit_dict['z'],
          pit_dict['L15'],
          pit_dict['DA'],
@@ -106,7 +97,7 @@ plot_PIT(pit_dict['z'],
-# %%
 plot_crps(crps_dict['z'],
           crps_dict['L15'],
           crps_dict['DA'],
@@ -116,5 +107,6 @@ plot_crps(crps_dict['z'],
-# %%

 # ---
 # jupyter:
 #   jupytext:
 #     text_representation:
 #       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
+# # $p(z)$ DISTRIBUTIONS
 # ## PIT AND CRPS FOR THE THREE METHODS
 # ### LOAD PYTHON MODULES
 # %load_ext autoreload
 # %autoreload 2
+import temps
 import pandas as pd
 import numpy as np
 import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
+from pathlib import Path
 #matplotlib settings
 from matplotlib import rcParams
 import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+# +
+from temps.temps import TempsModule
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.plots import plot_photoz, plot_PIT, plot_crps
+# -
 # ### LOAD DATA
+#define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
+photoz_archive = Archive(path = parent_dir,
                          only_zspec=False,
                          flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ],
                          target_test='L15')
 f_test, ferr_test, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
 # ## CREATE PIT; CRPS; SPECTROSCOPIC SAMPLE
 # This loads pre-trained models (for the sake of time). You can learn how to train the models in the Tutorial notebook.
 # Initialize an empty dictionary to store DataFrames
 crps_dict = {}
 pit_dict = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
+    nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
     nn_z = MeasureZ(num_gauss=6)
+    nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt',map_location=torch.device('cpu')))
+    temps_module = TempsModule(nn_features, nn_z)
+    pit_list = temps_module.calculate_pit(input_data=torch.Tensor(f_test), target_data=torch.Tensor(specz_test))
+    crps_list = temps_module.calculate_crps(input_data=torch.Tensor(f_test), target_data=specz_test)
     # Assign the DataFrame to a key in the dictionary
     pit_dict[lab] = pit_list
+# +
 plot_PIT(pit_dict['z'],
          pit_dict['L15'],
          pit_dict['DA'],
+# +
 plot_crps(crps_dict['z'],
           crps_dict['L15'],
           crps_dict['DA'],
+# -

notebooks/Qualitycut.py ADDED Viewed

	@@ -0,0 +1,241 @@

+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.2
+#   kernelspec:
+#     display_name: temps
+#     language: python
+#     name: temps
+# ---
+# # QUALITY CUTS
+# %load_ext autoreload
+# %autoreload 2
+import pandas as pd
+import numpy as np
+import os
+import torch
+from scipy import stats
+from pathlib import Path
+#matplotlib settings
+from matplotlib import rcParams
+import matplotlib.pyplot as plt
+rcParams["mathtext.fontset"] = "stix"
+rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad, caluclate_eta
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
+# ### LOAD DATA (ONLY SPECZ)
+#define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
+photoz_archive = Archive(path = parent_dir,only_zspec=True,flags_kept=[1. , 1.1, 1.4, 1.5, 2,2.1,2.4,2.5,3., 3.1, 3.4, 3.5,  4., 9. , 9.1, 9.3, 9.4, 9.5,11.1, 11.5, 12.1, 12.5, 13. , 13.1, 13.5, 14, ])
+f_test_specz, ferr_test_specz, specz_test ,VIS_mag_test = photoz_archive.get_testing_data()
+# ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
+# Initialize an empty dictionary to store DataFrames
+dfs = {}
+pzs = np.zeros(shape = (3,11016,1000))
+for il, lab in enumerate(['z','L15','DA']):
+    nn_features = EncoderPhotometry()
+    nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
+    nn_z = MeasureZ(num_gauss=6)
+    nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt', map_location=torch.device('cpu')))
+    temps_module = TempsModule(nn_features, nn_z)
+    z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(f_test_specz),
+                                return_pz=True)
+    pzs[il] = pz
+    # Create a DataFrame with the desired columns
+    df = pd.DataFrame(np.c_[z, odds, specz_test],
+                      columns=['z', 'odds' ,'ztarget'])
+    # Calculate additional columns or operations if needed
+    df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
+    # Drop any rows with NaN values
+    df = df.dropna()
+    # Assign the DataFrame to a key in the dictionary
+    dfs[lab] = df
+# ### STATS
+# +
+#odds_test = [0, 0.01, 0.03, 0.05, 0.07, 0.1, 0.13, 0.15]
+odds_test = np.arange(0,0.15,0.01)
+df = dfs['DA'].copy()
+zgrid = np.linspace(0, 5, 1000)
+pz = pzs[2]
+# -
+diff_matrix = np.abs(df.z.values[:,None] - zgrid[None,:])
+idx_peak = np.argmax(pz,1)
+idx = np.argmin(diff_matrix,1)
+odds_cat = np.zeros(shape = (len(odds_test),len(df)))
+for ii, odds_ in enumerate(odds_test):
+    diff_matrix_upper = np.abs((df.z.values+odds_)[:,None] - zgrid[None,:])
+    diff_matrix_lower = np.abs((df.z.values-odds_)[:,None] - zgrid[None,:])
+    idx = np.argmin(diff_matrix,1)
+    idx_upper = np.argmin(diff_matrix_upper,1)
+    idx_lower = np.argmin(diff_matrix_lower,1)
+    odds = []
+    for jj in range(len(pz)):
+        odds.append(pz[jj,idx_lower[jj]:(idx_upper[jj]+1)].sum())
+    odds_cat[ii] = np.array(odds)
+odds_df = pd.DataFrame(odds_cat.T, columns=[f'odds_{x}' for x in odds_test])
+df = pd.concat([df, odds_df], axis=1)
+# ## statistics on ODDS
+# +
+scatter_odds, eta_odds,xlab_odds,  oddsmean = [],[],[], []
+for c in complenteness:
+    percentile_cutoff = df['odds'].quantile(c)
+    df_bin = df[(df.odds > percentile_cutoff)]
+    xlab_odds.append((1-c)*100)
+    oddsmean.append(np.mean(df_bin.odds))
+    scatter_odds.append(nmad(df_bin.zwerr))
+    eta_odds.append(caluclate_eta(df_bin))
+    if np.round(c,1) ==0.3:
+        percentiles_cutoff = [df[f'odds_{col}'].quantile(c) for col in odds_test]
+        scatters_odds = [nmad(df[df[f'odds_{col}'] > percentile_cutoff].zwerr) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)]
+        etas_odds = [caluclate_eta(df[df[f'odds_{col}'] > percentile_cutoff]) for (col, percentile_cutoff) in zip(odds_test,percentiles_cutoff)]
+# -
+df_completeness = pd.DataFrame(np.c_[xlab_odds,scatter_odds, eta_odds],
+                               columns = ['completeness', 'sigma_odds', 'eta_odds'])
+# ## PLOTS
+# +
+# Initialize the figure and axis
+fig, ax1 = plt.subplots(figsize=(7, 5))
+# First plot (Sigma) - using the left y-axis
+color = 'crimson'
+ax1.plot(df_completeness.completeness,
+         df_completeness.sigma_odds,
+         marker='.',
+         color=color,
+         label=r'NMAD',
+         ls='-',
+         alpha=0.5,
+        )
+ax1.set_xlabel('Completeness', fontsize=16)
+ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16)
+ax1.tick_params(axis='x', labelsize=14)
+ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3,  labelcolor=color)
+ax1.set_xticks(np.arange(5, 101, 10))
+ax2 = ax1.twinx()  # Create another y-axis that shares the same x-axis
+color = 'navy'
+ax2.plot(df_completeness.completeness,
+         df_completeness.eta_odds,
+         marker='.',
+         color=color,
+         label=r'$\eta$ [%]',
+         ls='--',
+         alpha=0.5)
+ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16)
+# Adjust notation to allow comparison
+ax1.yaxis.get_major_formatter().set_powerlimits((0, 0))  # Adjust scientific notation for Sigma
+ax2.yaxis.get_major_formatter().set_powerlimits((0, 0))  # Adjust scientific notation for Eta
+ax2.tick_params(axis='x', labelsize=14)
+ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3,  labelcolor=color)
+# Final adjustments
+fig.tight_layout()
+fig.legend(bbox_to_anchor = [-0.18,0.75,0.5,0.2], fontsize = 14)
+#plt.savefig('Flag_nmad_eta_sigma_comparison.pdf', bbox_inches='tight')
+plt.show()
+# +
+# Initialize the figure and axis
+fig, ax1 = plt.subplots(figsize=(7, 5))
+# First plot (Sigma) - using the left y-axis
+color = 'crimson'
+ax1.plot(odds_test,
+         scatters_odds,
+         marker='.',
+         color=color,
+         label=r'NMAD',
+         ls='-',
+         alpha=0.5,
+        )
+ax1.set_xlabel(r'$\delta z$ (ODDS)', fontsize=16)
+ax1.set_ylabel(r'NMAD [$\Delta z$]', color=color, fontsize=16)
+ax1.tick_params(axis='x', labelsize=14)
+ax1.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3,  labelcolor=color)
+ax1.set_xticks(np.arange(0,0.16,0.02))
+ax2 = ax1.twinx()  # Create another y-axis that shares the same x-axis
+color = 'navy'
+ax2.plot(odds_test,
+         etas_odds,
+         marker='.',
+         color=color,
+         label=r'$\eta$ [%]',
+         ls='--',
+         alpha=0.5)
+ax2.set_ylabel(r'$\eta$ [%]', color=color, fontsize=16)
+# Adjust notation to allow comparison
+ax1.yaxis.get_major_formatter().set_powerlimits((0, 0))  # Adjust scientific notation for Sigma
+ax2.yaxis.get_major_formatter().set_powerlimits((0, 0))  # Adjust scientific notation for Eta
+ax2.tick_params(axis='x', labelsize=14)
+ax2.tick_params(axis='y', which='major', labelsize = 14, width=2.5, length=3,  labelcolor=color)
+# Final adjustments
+fig.tight_layout()
+fig.legend(bbox_to_anchor = [0.10,0.75,0.5,0.2], fontsize = 14)
+#plt.savefig('ODDS_study.pdf', bbox_inches='tight')
+plt.show()
+# -

notebooks/Table_metrics.py CHANGED Viewed

@@ -5,11 +5,11 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
 # # TABLE METRICS
@@ -24,6 +24,7 @@ import torch
 from scipy import stats
 from astropy.io import fits
 from astropy.table import Table
 #matplotlib settings
 from matplotlib import rcParams
@@ -31,27 +32,22 @@ import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad, select_cut
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
 # ## LOAD DATA
 #define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
@@ -74,7 +70,7 @@ cat = cat[cat.ztarget>0]
 # ### EXTRACT PHOTOMETRY
-photoz_archive = archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
@@ -84,19 +80,19 @@ col, colerr = photoz_archive._to_colors(f, ferr)
 # Initialize an empty dictionary to store DataFrames
 lab='DA'
 nn_features = EncoderPhotometry()
-nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
 nn_z = MeasureZ(num_gauss=6)
-nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-temps = Temps_module(nn_features, nn_z)
-z,zerr, pz, flag, odds = temps.get_pz(input_data=torch.Tensor(col),
                             return_pz=True)
 # Create a DataFrame with the desired columns
-df = pd.DataFrame(np.c_[z, flag, odds, cat.ztarget, cat.reliable_S15, cat.specz_or_photo],
-                  columns=['z','zflag', 'odds' ,'ztarget','reliable_S15', 'specz_or_photo'])
 # Calculate additional columns or operations if needed
 df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
@@ -130,10 +126,12 @@ print(dfcuts.to_latex(float_format="%.3f",
 df_euclid = df[(df.z >0.2)&(df.z < 2.6)]
 # +
 df_selected, cut, dfcuts  = select_cut(df_euclid,
                           completenss_lim=None,
-                          nmad_lim=0.055,
                           outliers_lim=None,
                           return_df=True)

 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
 # # TABLE METRICS
 from scipy import stats
 from astropy.io import fits
 from astropy.table import Table
+from pathlib import Path
 #matplotlib settings
 from matplotlib import rcParams
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad, select_cut
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
 # ## LOAD DATA
 #define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 # +
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(parent_dir / filename_valid)
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
 # ### EXTRACT PHOTOMETRY
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
 # Initialize an empty dictionary to store DataFrames
 lab='DA'
 nn_features = EncoderPhotometry()
+nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt', map_location=torch.device('cpu')))
 nn_z = MeasureZ(num_gauss=6)
+nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt', map_location=torch.device('cpu')))
+temps_module = TempsModule(nn_features, nn_z)
+z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
                             return_pz=True)
 # Create a DataFrame with the desired columns
+df = pd.DataFrame(np.c_[z, odds, cat.ztarget, cat.reliable_S15, cat.specz_or_photo],
+                  columns=['z', 'odds' ,'ztarget','reliable_S15', 'specz_or_photo'])
 # Calculate additional columns or operations if needed
 df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
 df_euclid = df[(df.z >0.2)&(df.z < 2.6)]
+df_euclid
 # +
 df_selected, cut, dfcuts  = select_cut(df_euclid,
                           completenss_lim=None,
+                          nmad_lim= 0.05,
                           outliers_lim=None,
                           return_df=True)

notebooks/nz.py ADDED Viewed

	@@ -0,0 +1,215 @@

+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.2
+#   kernelspec:
+#     display_name: temps
+#     language: python
+#     name: temps
+# ---
+# # FIGURE 5 IN THE PAPER
+# ## n(z) distributions
+# %load_ext autoreload
+# %autoreload 2
+import pandas as pd
+import numpy as np
+from astropy.io import fits
+from astropy.table import Table
+import torch
+from pathlib import Path
+#matplotlib settings
+from matplotlib import rcParams
+import matplotlib.pyplot as plt
+rcParams["mathtext.fontset"] = "stix"
+rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
+from temps.plots import plot_nz
+eval_methods=False
+# ### LOAD DATA
+#define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
+# +
+filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+hdu_list = fits.open(parent_dir / filename_valid)
+cat = Table(hdu_list[1].data).to_pandas()
+cat = cat[cat['FLAG_PHOT']==0]
+cat = cat[cat['mu_class_L07']==1]
+cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
+cat = cat[cat['MAG_VIS']<25]
+# -
+ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
+specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
+ID = cat['ID']
+VISmag = cat['MAG_VIS']
+zsflag = cat['reliable_S15']
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
+f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
+col, colerr = photoz_archive._to_colors(f, ferr)
+# ### LOAD TRAINED MODELS AND EVALUATE PDFs AND REDSHIFT
+if eval_methods:
+    dfs = {}
+    for il, lab in enumerate(['z','L15','DA']):
+        nn_features = EncoderPhotometry()
+        nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
+        nn_z = MeasureZ(num_gauss=6)
+        nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt',map_location=torch.device('cpu')))
+        temps_module = TempsModule(nn_features, nn_z)
+        z, pz, odds = temps_module.get_pz(input_data=torch.Tensor(col),
+                                    return_pz=True)
+        # Create a DataFrame with the desired columns
+        df = pd.DataFrame(np.c_[ID, VISmag,z, odds, ztarget,zsflag, specz_or_photo],
+                          columns=['ID','VISmag','z','odds', 'ztarget','zsflag','S15_L15_flag'])
+        # Calculate additional columns or operations if needed
+        df['zwerr'] = (df.z - df.ztarget) / (1 + df.ztarget)
+        # Drop any rows with NaN values
+        df = df.dropna()
+        # Assign the DataFrame to a key in the dictionary
+        dfs[lab] = df
+# ### LOAD CATALOGUES IF AVAILABLE
+if not eval_methods:
+    df_zs = pd.read_csv(parent_dir / 'predictions_specztraining.csv', header=0)
+    df_zsL15 = pd.read_csv(parent_dir / 'predictions_speczL15training.csv', header=0)
+    df_DA = pd.read_csv(parent_dir / 'predictions_speczDAtraining.csv', header=0)
+    dfs = {}
+    dfs['z'] = df_zs
+    dfs['L15'] = df_zsL15
+    dfs['DA'] = df_DA
+# +
+import matplotlib.pyplot as plt
+from matplotlib import gridspec
+# Create figure and grid specification
+fig = plt.figure(figsize=(8, 10))
+gs = gridspec.GridSpec(5, 1, height_ratios=[0.1, 1, 1,1,1])
+# Upper panel (very thin) with shaded areas
+ax1 = plt.subplot(gs[0])
+ax1.set_yticks([])
+ax1.set_ylabel('Bins', fontsize=10)
+# Define the ranges for shaded areas
+#z_ranges = [[0.15, 0.35], [0.35, 0.55], [0.55, 0.85], [0.85, 1.05], [1.05, 1.35],
+#                 [1.35, 1.55],# [1.55, 1.85], [1.85, 2], [2, 2.5], [2.5, 3], [3, 4]]
+z_ranges = [[0.15, 0.5], [0.5, 1], [1, 1.5], [1.5,2]]#, [2, 3], [3,4]]#,
+                 #[1.35, 1.55],# [1.55, 1.85], [1.85, 2], [2, 2.5], [2.5, 3], [3, 4]]
+colors = ['deepskyblue', 'forestgreen', 'coral', 'grey', 'pink', 'goldenrod',
+          'cyan', 'seagreen', 'salmon', 'steelblue', 'orange']
+# Plot shaded areas
+x_values = [0, 1, 2]  # Example x values, adjust as needed
+for i, (start, end) in enumerate(z_ranges):
+    ax1.fill_betweenx(x_values, start, end, color=colors[i], alpha=0.5)
+# Middle panel (equally thick)
+ax2 = plt.subplot(gs[1])
+for i, (start, end) in enumerate(z_ranges):
+    dfplot_z = dfs['z'][(dfs['z']['ztarget'] > start) & (dfs['z']['ztarget'] < end)]
+    ax2.hist(dfplot_z.ztarget, bins=50, color=colors[i], histtype='step', linestyle='-', density=True, range=(0, 4))
+# Bottom panel (equally thick)
+ax3 = plt.subplot(gs[2])
+for i, (start, end) in enumerate(z_ranges):
+    dfplot_z = dfs['z'][(dfs['z']['z'] > start) & (dfs['z']['z'] < end)]
+    ax3.hist(dfplot_z.ztarget, bins=50, color=colors[i], histtype='step', linestyle='-', density=True, range=(0, 4))
+# Bottom panel (equally thick)
+ax4 = plt.subplot(gs[3])
+for i, (start, end) in enumerate(z_ranges):
+    dfplot_z = dfs['L15'][(dfs['L15']['z'] > start) & (dfs['L15']['z'] < end)]
+    print(len(dfplot_z))
+    ax4.hist(dfplot_z.ztarget, bins=50, color=colors[i], histtype='step', linestyle='-', density=True, range=(0, 4))
+ax5 = plt.subplot(gs[4])
+for i, (start, end) in enumerate(z_ranges):
+    dfplot_z = dfs['DA'][(dfs['DA']['z'] > start) & (dfs['DA']['z'] < end)]
+    ax5.hist(dfplot_z.ztarget, bins=50, color=colors[i], histtype='step', linestyle='-', density=True, range=(0, 4))
+plt.tight_layout()
+plt.show()
+# -
+def plot_nz(df_list,
+            zcuts = [0.1, 0.5, 1, 1.5, 2, 3, 4],
+            save=False):
+    # Plot properties
+    plt.rcParams['font.family'] = 'serif'
+    plt.rcParams['font.size'] = 16
+    cmap = plt.get_cmap('Dark2')  # Choose a colormap for coloring lines
+    # Create subplots
+    fig, axs = plt.subplots(3, 1, figsize=(20, 8), sharex=True)
+    for i, df in enumerate(df_list):
+        dfplot = df_list[i].copy()  # Assuming df_list contains dataframes
+        ax = axs[i]  # Selecting the appropriate subplot
+        for iz in range(len(zcuts)-1):
+            dfplot_z = dfplot[(dfplot['ztarget'] > zcuts[iz]) & (dfplot['ztarget'] < zcuts[iz + 1])]
+            color = cmap(iz)  # Get a different color for each redshift
+            zt_mean = np.median(dfplot_z.ztarget.values)
+            zp_mean = np.median(dfplot_z.z.values)
+            # Plot histogram on the selected subplot
+            ax.hist(dfplot_z.z, bins=50, color=color, histtype='step', linestyle='-', density=True, range=(0, 4))
+            ax.axvline(zt_mean, color=color, linestyle='-', lw=2)
+            ax.axvline(zp_mean, color=color, linestyle='--', lw=2)
+        ax.set_ylabel(f'Frequency', fontsize=14)
+        ax.grid(False)
+        ax.set_xlim(0, 3.5)
+    axs[-1].set_xlabel(f'$z$', fontsize=18)
+    if save:
+        plt.savefig(f'nz_hist.pdf', dpi=300, bbox_inches='tight')
+    plt.show()
+plot_nz(df_list)

notebooks/{Fig4_pz_examples.py → pz_examples.py} RENAMED Viewed

@@ -1,70 +1,55 @@
 # ---
 # jupyter:
 #   jupytext:
-#     formats: ipynb,py:percent
 #     text_representation:
 #       extension: .py
-#       format_name: percent
-#       format_version: '1.3'
-#       jupytext_version: 1.14.5
 #   kernelspec:
-#     display_name: insight
 #     language: python
-#     name: insight
 # ---
-# %% [markdown]
-# # FIGURE 4 IN THE PAPER
-# %% [markdown]
 # ## IMPACT OF TEMPS ON CONCRETE P(Z) EXAMPLES
-# %% [markdown]
 # ### LOAD PYTHON MODULES
-# %%
 # %load_ext autoreload
 # %autoreload 2
-# %%
 import pandas as pd
 import numpy as np
 import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
-# %%
 #matplotlib settings
 from matplotlib import rcParams
 import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-# %%
-#insight modules
-import sys
-sys.path.append('../temps')
-#from insight_arch import EncoderPhotometry, MeasureZ
-#from insight import Insight_module
-from archive import archive
-from utils import nmad
-from temps_arch import EncoderPhotometry, MeasureZ
-from temps import Temps_module
-# %% [markdown]
 # ### LOAD DATA
-# %%
 #define here the directory containing the photometric catalogues
-parent_dir = '/data/astro/scratch2/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5'
-modules_dir = '../data/models/'
-# %%
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-hdu_list = fits.open(os.path.join(parent_dir,filename_valid))
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
@@ -72,46 +57,41 @@ cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
 cat = cat[cat['MAG_VIS']<25]
-# %%
 ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
 specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
 ID = cat['ID']
 VISmag = cat['MAG_VIS']
 zsflag = cat['reliable_S15']
-# %%
-photoz_archive = archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
-# %% [markdown]
 # ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
-# %% [markdown]
 # The notebook 'Tutorial_temps' gives an example of how to train and save models.
-# %%
 # Initialize an empty dictionary to store DataFrames
 ii = np.random.randint(0,len(col),1)
 pz_dict = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
-    nn_features.load_state_dict(torch.load(os.path.join(modules_dir,f'modelF_{lab}.pt')))
     nn_z = MeasureZ(num_gauss=6)
-    nn_z.load_state_dict(torch.load(os.path.join(modules_dir,f'modelZ_{lab}.pt')))
-    temps = Temps_module(nn_features, nn_z)
-    z,zerr, pz, flag,_ = temps.get_pz(input_data=torch.Tensor(col[ii]),return_pz=True)
     # Assign the DataFrame to a key in the dictionary
     pz_dict[lab] = pz
-# %%
 cmap = plt.get_cmap('Dark2')
 plt.plot(np.linspace(0,5,1000),pz_dict['z'][0],label='z', color = cmap(0), ls ='--')
@@ -124,5 +104,6 @@ plt.legend()
 plt.xlabel(r'$z$', fontsize=14)
 plt.ylabel('Probability', fontsize=14)
 #plt.savefig(f'pz_{ii[0]}.pdf', bbox_inches='tight')
-# %%

 # ---
 # jupyter:
 #   jupytext:
 #     text_representation:
 #       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.16.2
 #   kernelspec:
+#     display_name: temps
 #     language: python
+#     name: temps
 # ---
+# # $p(z)$ examples
 # ## IMPACT OF TEMPS ON CONCRETE P(Z) EXAMPLES
 # ### LOAD PYTHON MODULES
 # %load_ext autoreload
 # %autoreload 2
 import pandas as pd
 import numpy as np
 import os
 from astropy.io import fits
 from astropy.table import Table
 import torch
+from pathlib import Path
 #matplotlib settings
 from matplotlib import rcParams
 import matplotlib.pyplot as plt
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+from temps.archive import Archive
+from temps.utils import nmad
+from temps.temps_arch import EncoderPhotometry, MeasureZ
+from temps.temps import TempsModule
 # ### LOAD DATA
 #define here the directory containing the photometric catalogues
+parent_dir = Path('/data/astro/scratch/lcabayol/insight/data/Euclid_EXT_MER_PHZ_DC2_v1.5')
+modules_dir = Path('../data/models/')
 filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+path_file = parent_dir / filename_valid  # Creating the path to the file
+hdu_list = fits.open(path_file)
 cat = Table(hdu_list[1].data).to_pandas()
 cat = cat[cat['FLAG_PHOT']==0]
 cat = cat[cat['mu_class_L07']==1]
 cat = cat[cat['MAG_VIS']<25]
 ztarget = [cat['z_spec_S15'].values[ii] if cat['z_spec_S15'].values[ii]> 0 else cat['photo_z_L15'].values[ii] for ii in range(len(cat))]
 specz_or_photo = [0 if cat['z_spec_S15'].values[ii]> 0 else 1 for ii in range(len(cat))]
 ID = cat['ID']
 VISmag = cat['MAG_VIS']
 zsflag = cat['reliable_S15']
+photoz_archive = Archive(path = parent_dir,only_zspec=False)
 f, ferr = photoz_archive._extract_fluxes(catalogue= cat)
 col, colerr = photoz_archive._to_colors(f, ferr)
 # ### LOAD TRAINED MODELS AND EVALUATE PDF OF RANDOM EXAMPLES
 # The notebook 'Tutorial_temps' gives an example of how to train and save models.
 # Initialize an empty dictionary to store DataFrames
 ii = np.random.randint(0,len(col),1)
 pz_dict = {}
 for il, lab in enumerate(['z','L15','DA']):
     nn_features = EncoderPhotometry()
+    nn_features.load_state_dict(torch.load(modules_dir / f'modelF_{lab}.pt',map_location=torch.device('cpu')))
     nn_z = MeasureZ(num_gauss=6)
+    nn_z.load_state_dict(torch.load(modules_dir / f'modelZ_{lab}.pt',map_location=torch.device('cpu')))
+    temps_module = TempsModule(nn_features, nn_z)
+    z, pz, fodds = temps_module.get_pz(input_data=torch.Tensor(col[ii]),return_pz=True)
     # Assign the DataFrame to a key in the dictionary
     pz_dict[lab] = pz
+# +
 cmap = plt.get_cmap('Dark2')
 plt.plot(np.linspace(0,5,1000),pz_dict['z'][0],label='z', color = cmap(0), ls ='--')
 plt.xlabel(r'$z$', fontsize=14)
 plt.ylabel('Probability', fontsize=14)
 #plt.savefig(f'pz_{ii[0]}.pdf', bbox_inches='tight')
+# -

temps/archive.py CHANGED Viewed

@@ -1,42 +1,62 @@
 import numpy as np
 import pandas as pd
 from astropy.io import fits
-import os
 from astropy.table import Table
 from scipy.spatial import KDTree
-import matplotlib.pyplot as plt
-from matplotlib import rcParams
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
-class archive():
-    def __init__(self, path, aperture=2, drop_stars=True, clean_photometry=True, convert_colors=True, extinction_corr=True, only_zspec=True, target_test='specz', flags_kept=[3,3.1,3.4,3.5,4]):
         self.aperture = aperture
-        self.flags_kept=flags_kept
-        filename_calib='euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
-        filename_valid='euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
-        hdu_list = fits.open(os.path.join(path,filename_calib))
-        cat = Table(hdu_list[1].data).to_pandas()
-        cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
-        hdu_list = fits.open(os.path.join(path,filename_valid))
-        cat_test = Table(hdu_list[1].data).to_pandas()
         if drop_stars==True:
             cat = cat[cat.mu_class_L07==1]
             cat_test = cat_test[cat_test.mu_class_L07==1]
         if clean_photometry==True:
             cat = self._clean_photometry(cat)
             cat_test = self._clean_photometry(cat_test)
@@ -55,6 +75,7 @@ class archive():
         self._set_training_data(cat,
                                 only_zspec=only_zspec,
                                 extinction_corr=extinction_corr,
                                 convert_colors=convert_colors)
@@ -65,17 +86,51 @@ class archive():
     def _extract_fluxes(self,catalogue):
-        columns_f = [f'FLUX_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
-        columns_ferr = [f'FLUXERR_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
         f = catalogue[columns_f].values
         ferr = catalogue[columns_ferr].values
         return f, ferr
     def _to_colors(self, flux, fluxerr):
         """ Convert fluxes to colors"""
-        color = flux[:,:-1] / flux[:,1:]
-        color_err = fluxerr[:,:-1]**2 / flux[:,1:]**2 + flux[:,:-1]**2 / flux[:,1:]**4 * fluxerr[:,:-1]**2
         return color,color_err
     def _set_combiend_target(self, catalogue):
@@ -92,13 +147,20 @@ class archive():
         return catalogue
-    def _correct_extinction(self,catalogue, f):
         """Corrects for extinction"""
         ext_correction_cols =  [f'EB_V_corr_FLUX_{x}' for x in ['G','R','I','Z','Y','J','H']]
-        ext_correction = catalogue[ext_correction_cols].values
         f = f * ext_correction
-        return f
     def _select_only_zspec(self,catalogue,cat_flag=None):
         """Selects only galaxies with spectroscopic redshift"""
@@ -158,22 +220,24 @@ class archive():
         return catalogue_valid
-    def _set_training_data(self,catalogue, only_zspec=True, extinction_corr=True, convert_colors=True):
-        cat_da = self._exclude_only_zspec(catalogue)
         target_z_train_DA = cat_da['photo_z_L15'].values
         if only_zspec:
             catalogue = self._select_only_zspec(catalogue, cat_flag='Calib')
             catalogue = self._clean_zspec_sample(catalogue, flags_kept=self.flags_kept)
         else:
             catalogue = self._take_zspec_and_photoz(catalogue, cat_flag='Calib')
         self.cat_train=catalogue
         f, ferr = self._extract_fluxes(catalogue)
         f_DA, ferr_DA = self._extract_fluxes(cat_da)
         idx = np.random.randint(0, len(f_DA), len(f))
         f_DA, ferr_DA = f_DA[idx], ferr_DA[idx]
@@ -182,9 +246,11 @@ class archive():
         if extinction_corr==True:
             f = self._correct_extinction(catalogue,f)
         if convert_colors==True:
             col, colerr = self._to_colors(f, ferr)
             col_DA, colerr_DA = self._to_colors(f_DA, ferr_DA)

 import numpy as np
 import pandas as pd
 from astropy.io import fits
 from astropy.table import Table
 from scipy.spatial import KDTree
+from matplotlib import pyplot as plt
+from matplotlib import rcParams
+from pathlib import Path
+from loguru import logger
 rcParams["mathtext.fontset"] = "stix"
 rcParams["font.family"] = "STIXGeneral"
+class Archive:
+    def __init__(self, path,
+                 aperture=2,
+                 drop_stars=True,
+                 clean_photometry=True,
+                 convert_colors=True,
+                 extinction_corr=True,
+                 only_zspec=True,
+                 all_apertures=False,
+                 target_test='specz', flags_kept=[3, 3.1, 3.4, 3.5, 4]):
+        logger.info("Starting archive")
         self.aperture = aperture
+        self.all_apertures = all_apertures
+        self.flags_kept = flags_kept
+        filename_calib = 'euclid_cosmos_DC2_S1_v2.1_calib_clean.fits'
+        filename_valid = 'euclid_cosmos_DC2_S1_v2.1_valid_matched.fits'
+        # Use Path for file handling
+        path_calib = Path(path) / filename_calib
+        path_valid = Path(path) / filename_valid
+        # Open the calibration FITS file
+        with fits.open(path_calib) as hdu_list:
+            cat = Table(hdu_list[1].data).to_pandas()
+            cat = cat[(cat['z_spec_S15'] > 0) | (cat['photo_z_L15'] > 0)]
+        # Open the validation FITS file
+        with fits.open(path_valid) as hdu_list:
+            cat_test = Table(hdu_list[1].data).to_pandas()
+        # Store the catalogs for later use
+        self.cat = cat
+        self.cat_test = cat_test
         if drop_stars==True:
+            logger.info("dropping stars...")
             cat = cat[cat.mu_class_L07==1]
             cat_test = cat_test[cat_test.mu_class_L07==1]
         if clean_photometry==True:
+            logger.info("cleaning stars...")
             cat = self._clean_photometry(cat)
             cat_test = self._clean_photometry(cat_test)
         self._set_training_data(cat,
+                                cat_test,
                                 only_zspec=only_zspec,
                                 extinction_corr=extinction_corr,
                                 convert_colors=convert_colors)
     def _extract_fluxes(self,catalogue):
+        if self.all_apertures:
+            columns_f = [f'FLUX_{x}_{a}' for a in [1,2,3] for x in ['G','R','I','Z','Y','J','H']]
+            columns_ferr = [f'FLUXERR_{x}_{a}' for a in [1,2,3] for x in ['G','R','I','Z','Y','J','H'] ]
+        else:
+            columns_f = [f'FLUX_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+            columns_ferr = [f'FLUXERR_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
         f = catalogue[columns_f].values
         ferr = catalogue[columns_ferr].values
         return f, ferr
+    def _extract_magnitudes(self,catalogue):
+        if self.all_apertures:
+            columns_m = [f'MAG_{x}_{a}' for a in [1,2,3] for x in ['G','R','I','Z','Y','J','H']]
+            columns_merr = [f'MAGERR_{x}_{a}' for a in [1,2,3] for x in ['G','R','I','Z','Y','J','H'] ]
+        else:
+            columns_m = [f'MAG_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+            columns_merr = [f'MAGERR_{x}_{self.aperture}' for x in ['G','R','I','Z','Y','J','H']]
+        m = catalogue[columns_m].values
+        merr = catalogue[columns_merr].values
+        return m, merr
     def _to_colors(self, flux, fluxerr):
         """ Convert fluxes to colors"""
+        if self.all_apertures:
+            for a in range(3):
+                lim1 = 7*a
+                lim2 = 7*(a+1)
+                c = flux[:,lim1:(lim2-1)] / flux[:,(lim1+1):lim2]
+                cerr = np.sqrt((fluxerr[:,lim1:(lim2-1)]/ flux[:,(lim1+1):lim2])**2 + (flux[:,lim1:(lim2-1)] / flux[:,(lim1+1):lim2]**2)**2 * fluxerr[:,(lim1+1):lim2]**2)
+                if a==0:
+                    color = c
+                    color_err = cerr
+                else:
+                    color = np.concatenate((color,c),axis=1)
+                    color_err = np.concatenate((color_err,cerr),axis=1)
+        else:
+            color = flux[:,:-1] / flux[:,1:]
+            color_err = np.sqrt((fluxerr[:,:-1]/ flux[:,1:])**2 + (flux[:,:-1] / flux[:,1:]**2)**2 * fluxerr[:,1:]**2)
         return color,color_err
     def _set_combiend_target(self, catalogue):
         return catalogue
+    def _correct_extinction(self,catalogue, f, return_ext_corr=False):
         """Corrects for extinction"""
         ext_correction_cols =  [f'EB_V_corr_FLUX_{x}' for x in ['G','R','I','Z','Y','J','H']]
+        if self.all_apertures:
+            ext_correction = catalogue[ext_correction_cols].values
+            ext_correction = np.concatenate((ext_correction,ext_correction,ext_correction),axis=1)
+        else:
+            ext_correction = catalogue[ext_correction_cols].values
         f = f * ext_correction
+        if return_ext_corr:
+            return f, ext_correction
+        else:
+            return f
     def _select_only_zspec(self,catalogue,cat_flag=None):
         """Selects only galaxies with spectroscopic redshift"""
         return catalogue_valid
+    def _set_training_data(self,catalogue, catalogue_da, only_zspec=True, extinction_corr=True, convert_colors=True):
+        cat_da = self._exclude_only_zspec(catalogue_da)
         target_z_train_DA = cat_da['photo_z_L15'].values
         if only_zspec:
+            logger.info("Selecting only galaxies with spectroscopic redshift")
             catalogue = self._select_only_zspec(catalogue, cat_flag='Calib')
             catalogue = self._clean_zspec_sample(catalogue, flags_kept=self.flags_kept)
         else:
+            logger.info("Selecting galaxies with spectroscopic redshift and high-precision photo-z")
             catalogue = self._take_zspec_and_photoz(catalogue, cat_flag='Calib')
         self.cat_train=catalogue
         f, ferr = self._extract_fluxes(catalogue)
         f_DA, ferr_DA = self._extract_fluxes(cat_da)
         idx = np.random.randint(0, len(f_DA), len(f))
         f_DA, ferr_DA = f_DA[idx], ferr_DA[idx]
         if extinction_corr==True:
+            logger.info("Correcting MW extinction")
             f = self._correct_extinction(catalogue,f)
         if convert_colors==True:
+            logger.info("Converting to colors")
             col, colerr = self._to_colors(f, ferr)
             col_DA, colerr_DA = self._to_colors(f_DA, ferr_DA)

temps/plots.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-from utils import nmad
 import numpy as np
 import matplotlib.pyplot as plt
@@ -181,68 +181,7 @@ def plot_PIT(pit_list_1, pit_list_2 = None, pit_list_3=None, sample='specz', lab
     # Show the plot
     plt.show()
-import numpy as np
-import matplotlib.pyplot as plt
-from scipy import stats
-def plot_photoz(df_list, nbins, xvariable, metric, type_bin='bin',label_list=None, samp='zs', save=False):
-    #plot properties
-    plt.rcParams['font.family'] = 'serif'
-    plt.rcParams['font.size'] = 12
-    bin_edges = stats.mstats.mquantiles(df_list[0][xvariable].values, np.linspace(0.05, 1, nbins))
-    print(bin_edges)
-    cmap = plt.get_cmap('Dark2')  # Choose a colormap for coloring lines
-    plt.figure(figsize=(6, 5))
-    ls = ['--',':','-']
-    for i, df in enumerate(df_list):
-        ydata, xlab = [], []
-        for k in range(len(bin_edges)-1):
-            edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            mean_mag = (edge_max + edge_min) / 2
-            if type_bin == 'bin':
-                df_plot = df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
-            elif type_bin == 'cum':
-                df_plot = df[(df[xvariable] < edge_max)]
-            else:
-                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
-            xlab.append(mean_mag)
-            if metric == 'sig68':
-                ydata.append(sigma68(df_plot.zwerr))
-            elif metric == 'bias':
-                ydata.append(np.mean(df_plot.zwerr))
-            elif metric == 'nmad':
-                ydata.append(nmad(df_plot.zwerr))
-            elif metric == 'outliers':
-                ydata.append(len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot)*100)
-        print(ydata)
-        color = cmap(i)  # Get a different color for each dataframe
-        plt.plot(xlab, ydata,marker='.', lw=1, label=f'{label_list[i]}', color=color, ls=ls[i])
-    if xvariable == 'VISmag':
-        xvariable_lab = 'VIS'
-    plt.ylabel(f'{metric} $[\\Delta z]$', fontsize=18)
-    plt.xlabel(f'{xvariable_lab}', fontsize=16)
-    plt.grid(False)
-    plt.legend()
-    if save==True:
-        plt.savefig(f'{metric}_{xvariable}_{samp}.pdf', dpi=300, bbox_inches='tight')
-    plt.show()
 def plot_nz(df_list,
@@ -336,3 +275,43 @@ def plot_crps(crps_list_1, crps_list_2 = None, crps_list_3=None, labels=None,  s
     # Show the plot
     plt.show()

 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+from temps.utils import nmad
 import numpy as np
 import matplotlib.pyplot as plt
     # Show the plot
     plt.show()
 def plot_nz(df_list,
     # Show the plot
     plt.show()
+def plot_nz(df, bins=np.arange(0,5,0.2)):
+    kwargs=dict( bins=bins,alpha=0.5)
+    plt.hist(df.zs.values, color='grey', ls='-' ,**kwargs)
+    counts, _, =np.histogram(df.z.values, bins=bins)
+    plt.plot((bins[:-1]+bins[1:])*0.5,counts, color ='purple')
+    #plt.legend(fontsize=14)
+    plt.xlabel(r'Redshift', fontsize=14)
+    plt.ylabel(r'Counts', fontsize=14)
+    plt.yscale('log')
+    plt.show()
+    return
+def plot_scatter(df, sample='specz', save=True):
+    # Calculate the point density
+    xy = np.vstack([df.zs.values,df.z.values])
+    zd = gaussian_kde(xy)(xy)
+    fig, ax = plt.subplots()
+    plt.scatter(df.zs.values, df.z.values,c=zd, s=1)
+    plt.xlim(0,5)
+    plt.ylim(0,5)
+    plt.xlabel(r'$z_{\rm s}$', fontsize = 14)
+    plt.ylabel('$z$', fontsize = 14)
+    plt.xticks(fontsize = 12)
+    plt.yticks(fontsize = 12)
+    if save==True:
+        plt.savefig(f'{sample}_scatter.pdf', dpi = 300, bbox_inches='tight')
+    plt.show()

temps/temps.py CHANGED Viewed

@@ -1,257 +1,267 @@
 import torch
-from torch.utils.data import DataLoader, dataset, TensorDataset
 from torch import nn, optim
 from torch.optim import lr_scheduler
-import numpy as np
-import pandas as pd
-from astropy.io import fits
-import os
-from astropy.table import Table
-from scipy.spatial import KDTree
-from scipy.special import erf
 from scipy.stats import norm
-import sys
-sys.path.append('/.')
-from utils import maximum_mean_discrepancy, compute_kernel
-class Temps_module():
-    """ Define class"""
-    def __init__(self, modelF, modelZ, batch_size=100,rejection_param=1, da=True, verbose=False):
-        self.modelZ=modelZ
-        self.modelF=modelF
-        self.da=da
-        self.verbose=verbose
-        self.ngaussians=modelZ.ngaussians
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        self.batch_size=batch_size
-        self.rejection_parameter=rejection_param
-    def _get_dataloaders(self, input_data, target_data, input_data_DA, target_data_DA, val_fraction=0.1):
         input_data = torch.Tensor(input_data)
         target_data = torch.Tensor(target_data)
-        if input_data_DA is not None:
-            input_data_DA = torch.Tensor(input_data_DA)
-            target_data_DA = torch.Tensor(target_data_DA)
-        else:
-            input_data_DA = input_data.clone()
-            target_data_DA = target_data.clone()
-        dataset = TensorDataset(input_data, input_data_DA, target_data, target_data_DA)
-        trainig_dataset, val_dataset = torch.utils.data.random_split(dataset, [int(len(dataset)*(1-val_fraction)), int(len(dataset)*val_fraction)+1])
-        loader_train = DataLoader(trainig_dataset, batch_size=self.batch_size, shuffle = True)
-        loader_val = DataLoader(val_dataset, batch_size=64, shuffle = True)
         return loader_train, loader_val
-    def _loss_function(self,mean, std, logmix, true):
-        log_prob =   logmix - 0.5*(mean - true[:,None]).pow(2) / std.pow(2) - torch.log(std)
-        log_prob = torch.logsumexp(log_prob, 1)
         loss = -log_prob.mean()
-        return loss
-    def _loss_function_DA(self,f1, f2):
-        kl_loss = nn.KLDivLoss(reduction="batchmean",log_target=True)
         loss = kl_loss(f1, f2)
-        loss = torch.log(loss)
-        #print('f1',f1)
-        #print('f2',f2)
-        return loss
-    def _to_numpy(self,x):
         return x.detach().cpu().numpy()
-    def train(self,input_data,
-              input_data_DA,
-              target_data,
-              target_data_DA,
-              nepochs=10,
-              step_size = 100,
-              val_fraction=0.1,
-              lr=1e-3,
-             weight_decay=0):
-        self.modelZ = self.modelZ.train()
-        self.modelF = self.modelF.train()
-        loader_train, loader_val = self._get_dataloaders(input_data, target_data, input_data_DA, target_data_DA, val_fraction=0.1)
-        optimizerZ = optim.Adam(self.modelZ.parameters(), lr=lr, weight_decay=weight_decay)
-        optimizerF = optim.Adam(self.modelF.parameters(), lr=lr, weight_decay=weight_decay)
-        schedulerZ = torch.optim.lr_scheduler.StepLR(optimizerZ, step_size=step_size, gamma =0.1)
-        schedulerF = torch.optim.lr_scheduler.StepLR(optimizerF, step_size=step_size, gamma =0.1)
-        self.modelZ = self.modelZ.to(self.device)
-        self.modelF = self.modelF.to(self.device)
-        self.loss_train, self.loss_validation = [],[]
-        for epoch in range(nepochs):
-            for input_data, input_data_da, target_data, target_data_DA  in loader_train:
-                _loss_train, _loss_validation = [],[]
-                input_data = input_data.to(self.device)
-                target_data = target_data.to(self.device)
                 if self.da:
                     input_data_da = input_data_da.to(self.device)
-                    target_data_DA = target_data_DA.to(self.device)
-                optimizerF.zero_grad()
-                optimizerZ.zero_grad()
-                features = self.modelF(input_data)
-                if self.da:
-                    features_DA = self.modelF(input_data_da)
-                mu, logsig, logmix_coeff = self.modelZ(features)
-                logsig = torch.clamp(logsig,-6,2)
                 sig = torch.exp(logsig)
-                lossZ = self._loss_function(mu, sig, logmix_coeff, target_data)
-                #mu, logsig, logmix_coeff = self.modelZ(features_DA)
-                #logsig = torch.clamp(logsig,-6,2)
-                #sig = torch.exp(logsig)
-                #lossZ_DA = self._loss_function(mu, sig, logmix_coeff, target_data_DA)
-                if self.da:
-                    lossDA = maximum_mean_discrepancy(features, features_DA, kernel_type='rbf')
-                    lossDA = lossDA.sum()
-                    loss = lossZ +1e3*lossDA
-                else:
-                    loss = lossZ
-                _loss_train.append(lossZ.item())
                 loss.backward()
-                optimizerF.step()
-                optimizerZ.step()
-            schedulerF.step()
-            schedulerZ.step()
-            self.loss_train.append(np.mean(_loss_train))
-            for input_data, _, target_data, _ in loader_val:
                 input_data = input_data.to(self.device)
                 target_data = target_data.to(self.device)
-                features = self.modelF(input_data)
-                mu, logsig, logmix_coeff = self.modelZ(features)
-                logsig = torch.clamp(logsig,-6,2)
                 sig = torch.exp(logsig)
                 loss_val = self._loss_function(mu, sig, logmix_coeff, target_data)
                 _loss_validation.append(loss_val.item())
-            self.loss_validation.append(np.mean(_loss_validation))
-            if self.verbose:
-                print(f'training_loss:{loss}',f'testing_loss:{loss_val}')
     def get_features(self, input_data):
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        return features.detach().cpu().numpy()
-    def get_pz(self,input_data, return_pz=True, return_flag=True, retrun_odds=False):
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
-        logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         mix_coeff = torch.exp(logmix_coeff)
-        z = (mix_coeff * mu).sum(1)
-        zerr = torch.sqrt( (mix_coeff * sig**2).sum(1) + (mix_coeff * (mu - mu.mean(1)[:,None])**2).sum(1))
-        mu,  mix_coeff, sig = mu.detach().cpu().numpy(),  mix_coeff.detach().cpu().numpy(), sig.detach().cpu().numpy()
-        if return_pz==True:
-            zgrid = np.linspace(0, 5, 1000)
-            pdf_mixture = np.zeros(shape=(len(input_data), len(zgrid)))
-            for ii in range(len(input_data)):
-                for i in range(self.ngaussians):
-                    pdf_mixture[ii] += mix_coeff[ii,i] * norm.pdf(zgrid, mu[ii,i], sig[ii,i])
-            if return_flag==True:
-                #narrow peak
-                pdf_mixture = pdf_mixture / pdf_mixture.sum(1)[:,None]
-                diff_matrix = np.abs(self._to_numpy(z)[:,None] - zgrid[None,:])
-                #odds
-                idx_peak = np.argmax(pdf_mixture,1)
-                zpeak = zgrid[idx_peak]
-                diff_matrix_upper = np.abs((zpeak+0.05)[:,None] - zgrid[None,:])
-                diff_matrix_lower = np.abs((zpeak-0.05)[:,None] - zgrid[None,:])
-                idx = np.argmin(diff_matrix,1)
-                idx_upper = np.argmin(diff_matrix_upper,1)
-                idx_lower = np.argmin(diff_matrix_lower,1)
-                p_z_x = np.zeros(shape=(len(z)))
-                odds = np.zeros(shape=(len(z)))
-                for ii in range(len(z)):
-                    p_z_x[ii] = pdf_mixture[ii,idx[ii]]
-                    odds[ii] = pdf_mixture[ii,:idx_upper[ii]].sum() - pdf_mixture[ii,:idx_lower[ii]].sum()
-                return self._to_numpy(z),self._to_numpy(zerr), pdf_mixture, p_z_x, odds
-            else:
-                return self._to_numpy(z),self._to_numpy(zerr), pdf_mixture
         else:
-            return self._to_numpy(z),self._to_numpy(zerr)
-    def pit(self, input_data, target_data):
         pit_list = []
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
@@ -267,7 +277,8 @@ class Temps_module():
         return pit_list
-    def crps(self, input_data, target_data):
         def measure_crps(cdf, t):
             zgrid = np.linspace(0,4,1000)
@@ -281,16 +292,16 @@ class Temps_module():
         crps_list = []
-        self.modelF = self.modelF.eval()
-        self.modelF = self.modelF.to(self.device)
-        self.modelZ = self.modelZ.eval()
-        self.modelZ = self.modelZ.to(self.device)
         input_data = input_data.to(self.device)
-        features = self.modelF(input_data)
-        mu, logsig, logmix_coeff = self.modelZ(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
@@ -302,21 +313,19 @@ class Temps_module():
         z = (mix_coeff * mu).sum(1)
         x = np.linspace(0, 4, 1000)
-        pdf_mixture = np.zeros(shape=(len(target_data), len(x)))
         for ii in range(len(input_data)):
             for i in range(6):
-                pdf_mixture[ii] += mix_coeff[ii,i] * norm.pdf(x, mu[ii,i], sig[ii,i])
-        pdf_mixture = pdf_mixture / pdf_mixture.sum(1)[:,None]
-        cdf_mixture = np.cumsum(pdf_mixture,1)
-        crps_value = measure_crps(cdf_mixture, target_data)
         return crps_value

+import numpy as np
+import pandas as pd
 import torch
 from torch import nn, optim
+from torch.utils.data import DataLoader, TensorDataset
 from torch.optim import lr_scheduler
 from scipy.stats import norm
+from loguru import logger
+from tqdm import tqdm  # Import tqdm for progress bars
+# Local imports
+from temps.utils import maximum_mean_discrepancy
+class TempsModule:
+    """Class for managing temperature-related models and training."""
+    def __init__(
+        self,
+        model_f,
+        model_z,
+        batch_size=100,
+        rejection_param=1,
+        da=True,
+        verbose=False,
+    ):
+        self.model_z = model_z
+        self.model_f = model_f
+        self.da = da
+        self.verbose = verbose
+        self.ngaussians = model_z.ngaussians
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.batch_size = batch_size
+        self.rejection_parameter = rejection_param
+    def _get_dataloaders(
+        self, input_data, target_data, input_data_da=None, val_fraction=0.1
+    ):
+        """Create training and validation dataloaders."""
         input_data = torch.Tensor(input_data)
         target_data = torch.Tensor(target_data)
+        input_data_da = (
+            torch.Tensor(input_data_da)
+            if input_data_da is not None
+            else input_data.clone()
+        )
+        dataset = TensorDataset(input_data, input_data_da, target_data)
+        train_dataset, val_dataset = torch.utils.data.random_split(
+            dataset,
+            [int(len(dataset) * (1 - val_fraction)), int(len(dataset) * val_fraction)],
+        )
+        loader_train = DataLoader(
+            train_dataset, batch_size=self.batch_size, shuffle=True
+        )
+        loader_val = DataLoader(val_dataset, batch_size=64, shuffle=True)
         return loader_train, loader_val
+    def _loss_function(self, mean, std, logmix, true):
+        """Compute the loss function."""
+        log_prob = (
+            logmix - 0.5 * (mean - true[:, None]).pow(2) / std.pow(2) - torch.log(std)
+        )
+        log_prob = torch.logsumexp(log_prob, dim=1)
         loss = -log_prob.mean()
+        return loss
+    def _loss_function_da(self, f1, f2):
+        """Compute the KL divergence loss for domain adaptation."""
+        kl_loss = nn.KLDivLoss(reduction="batchmean", log_target=True)
         loss = kl_loss(f1, f2)
+        return torch.log(loss)
+    def _to_numpy(self, x):
+        """Convert a tensor to a NumPy array."""
         return x.detach().cpu().numpy()
+    def train(
+        self,
+        input_data,
+        input_data_da,
+        target_data,
+        nepochs=10,
+        step_size=100,
+        val_fraction=0.1,
+        lr=1e-3,
+        weight_decay=0,
+    ):
+        """Train the models using provided data."""
+        self.model_z.train()
+        self.model_f.train()
+        loader_train, loader_val = self._get_dataloaders(
+            input_data, target_data, input_data_da, val_fraction
+        )
+        optimizer_z = optim.Adam(
+            self.model_z.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        optimizer_f = optim.Adam(
+            self.model_f.parameters(), lr=lr, weight_decay=weight_decay
+        )
+        scheduler_z = lr_scheduler.StepLR(optimizer_z, step_size=step_size, gamma=0.1)
+        scheduler_f = lr_scheduler.StepLR(optimizer_f, step_size=step_size, gamma=0.1)
+        self.model_z.to(self.device)
+        self.model_f.to(self.device)
+        loss_train, loss_validation = [], []
+        for epoch in range(nepochs):
+            _loss_train, _loss_validation = [], []
+            logger.info(f"Epoch {epoch + 1}/{nepochs} starting...")
+            for input_data, input_data_da, target_data in tqdm(
+                loader_train, desc="Training", unit="batch"
+            ):
+                input_data, target_data = input_data.to(self.device), target_data.to(
+                    self.device
+                )
                 if self.da:
                     input_data_da = input_data_da.to(self.device)
+                optimizer_f.zero_grad()
+                optimizer_z.zero_grad()
+                features = self.model_f(input_data)
+                features_da = self.model_f(input_data_da) if self.da else None
+                mu, logsig, logmix_coeff = self.model_z(features)
+                logsig = torch.clamp(logsig, -6, 2)
                 sig = torch.exp(logsig)
+                loss_z = self._loss_function(mu, sig, logmix_coeff, target_data)
+                loss = loss_z + (
+                    1e3
+                    * maximum_mean_discrepancy(
+                        features, features_da, kernel_type="rbf"
+                    ).sum()
+                    if self.da
+                    else 0
+                )
+                _loss_train.append(loss_z.item())
                 loss.backward()
+                optimizer_f.step()
+                optimizer_z.step()
+            scheduler_f.step()
+            scheduler_z.step()
+            loss_train.append(np.mean(_loss_train))
+            _loss_validation = self._validate(loader_val, target_data)
+            logger.info(
+                f"Epoch {epoch + 1}: Training Loss: {np.mean(_loss_train):.4f}, Validation Loss: {np.mean(_loss_validation):.4f}"
+            )
+    def _validate(self, loader_val, target_data):
+        """Validate the model on the validation dataset."""
+        self.model_z.eval()
+        self.model_f.eval()
+        _loss_validation = []
+        with torch.no_grad():
+            for input_data, _, target_data in tqdm(
+                loader_val, desc="Validating", unit="batch"
+            ):
                 input_data = input_data.to(self.device)
                 target_data = target_data.to(self.device)
+                features = self.model_f(input_data)
+                mu, logsig, logmix_coeff = self.model_z(features)
+                logsig = torch.clamp(logsig, -6, 2)
                 sig = torch.exp(logsig)
                 loss_val = self._loss_function(mu, sig, logmix_coeff, target_data)
                 _loss_validation.append(loss_val.item())
+        return _loss_validation
     def get_features(self, input_data):
+        """Get features from the model."""
+        self.model_f.eval()
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        return self._to_numpy(features)
+    def get_pz(self, input_data, return_pz=True, return_flag=True, return_odds=False):
+        """Get the predicted z values and their uncertainties."""
+        logger.info("Predicting photo-z for the input galaxies...")
+        self.model_z.eval()
+        self.model_f.eval()
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
+        logsig = torch.clamp(logsig, -6, 2)
         sig = torch.exp(logsig)
         mix_coeff = torch.exp(logmix_coeff)
+        z = (mix_coeff * mu).sum(dim=1)
+        zerr = torch.sqrt(
+            (mix_coeff * sig**2).sum(dim=1)
+            + (mix_coeff * (mu - mu.mean(dim=1, keepdim=True)) ** 2).sum(dim=1)
+        )
+        mu, mix_coeff, sig = map(self._to_numpy, (mu, mix_coeff, sig))
+        if return_pz:
+            logger.info("Returning p(z)")
+            return self._calculate_pdf(z, mu, sig, mix_coeff, return_flag)
         else:
+            return self._to_numpy(z), self._to_numpy(zerr)
+    def _calculate_pdf(self, z, mu, sig, mix_coeff, return_flag):
+        """Calculate the probability density function."""
+        zgrid = np.linspace(0, 5, 1000)
+        pz = np.zeros((len(z), len(zgrid)))
+        for ii in range(len(z)):
+            for i in range(self.ngaussians):
+                pz[ii] += mix_coeff[ii, i] * norm.pdf(
+                    zgrid, mu[ii, i], sig[ii, i]
+                )
+        if return_flag:
+            logger.info("Calculating and returning ODDS")
+            pz /= pz.sum(axis=1, keepdims=True)
+            return self._calculate_odds(z, pz, zgrid)
+        return self._to_numpy(z), pz
+    def _calculate_odds(self, z, pz, zgrid):
+        """Calculate odds based on the PDF."""
+        logger.info('Calculating ODDS values')
+        diff_matrix = np.abs(self._to_numpy(z)[:, None] - zgrid[None, :])
+        idx_peak = np.argmax(pz, axis=1)
+        zpeak = zgrid[idx_peak]
+        idx_upper = np.argmin(np.abs((zpeak + 0.05)[:, None] - zgrid[None, :]), axis=1)
+        idx_lower = np.argmin(np.abs((zpeak - 0.05)[:, None] - zgrid[None, :]), axis=1)
+        odds = []
+        for jj in range(len(pz)):
+            odds.append(pz[jj,idx_lower[jj]:(idx_upper[jj]+1)].sum())
+        odds = np.array(odds)
+        return self._to_numpy(z), pz, odds
+    def calculate_pit(self, input_data, target_data):
+        logger.info('Calculating PIT values')
         pit_list = []
+        self.model_f = self.model_f.eval()
+        self.model_f = self.model_f.to(self.device)
+        self.model_z = self.model_z.eval()
+        self.model_z = self.model_z.to(self.device)
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         return pit_list
+    def calculate_crps(self, input_data, target_data):
+        logger.info('Calculating CRPS values')
         def measure_crps(cdf, t):
             zgrid = np.linspace(0,4,1000)
         crps_list = []
+        self.model_f = self.model_f.eval()
+        self.model_f = self.model_f.to(self.device)
+        self.model_z = self.model_z.eval()
+        self.model_z = self.model_z.to(self.device)
         input_data = input_data.to(self.device)
+        features = self.model_f(input_data)
+        mu, logsig, logmix_coeff = self.model_z(features)
         logsig = torch.clamp(logsig,-6,2)
         sig = torch.exp(logsig)
         z = (mix_coeff * mu).sum(1)
         x = np.linspace(0, 4, 1000)
+        pz = np.zeros(shape=(len(target_data), len(x)))
         for ii in range(len(input_data)):
             for i in range(6):
+                pz[ii] += mix_coeff[ii,i] * norm.pdf(x, mu[ii,i], sig[ii,i])
+        pz = pz / pz.sum(1)[:,None]
+        cdf_z = np.cumsum(pz,1)
+        crps_value = measure_crps(cdf_z, target_data)
         return crps_value

temps/temps_arch.py CHANGED Viewed

@@ -20,52 +20,46 @@ class EncoderPhotometry(nn.Module):
             nn.Linear(50, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, 10)
         )
     def forward(self, x):
         f = self.features(x)
-        f =  F.log_softmax(f, dim=1)
         return f
 class MeasureZ(nn.Module):
     def __init__(self, num_gauss=10, dropout_prob=0):
         super(MeasureZ, self).__init__()
-        self.ngaussians=num_gauss
         self.measure_mu = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
         self.measure_coeffs = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
         self.measure_sigma = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
-            nn.Linear(20, num_gauss)
         )
     def forward(self, f):
         mu = self.measure_mu(f)
         sigma = self.measure_sigma(f)
         logmix_coeff = self.measure_coeffs(f)
-        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:,None]
-        return mu, sigma, logmix_coeff

             nn.Linear(50, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, 10),
         )
     def forward(self, x):
         f = self.features(x)
+        f = F.log_softmax(f, dim=1)
         return f
 class MeasureZ(nn.Module):
     def __init__(self, num_gauss=10, dropout_prob=0):
         super(MeasureZ, self).__init__()
+        self.ngaussians = num_gauss
         self.measure_mu = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
         self.measure_coeffs = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
         self.measure_sigma = nn.Sequential(
             nn.Linear(10, 20),
             nn.Dropout(dropout_prob),
             nn.ReLU(),
+            nn.Linear(20, num_gauss),
         )
     def forward(self, f):
         mu = self.measure_mu(f)
         sigma = self.measure_sigma(f)
         logmix_coeff = self.measure_coeffs(f)
+        logmix_coeff = logmix_coeff - torch.logsumexp(logmix_coeff, 1)[:, None]
+        return mu, sigma, logmix_coeff

temps/utils.py CHANGED Viewed

@@ -3,113 +3,22 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from scipy import stats
 import torch
-from scipy.stats import gaussian_kde
-def nmad(data):
-    return 1.4826 * np.median(np.abs(data - np.median(data)))
-def sigma68(data): return 0.5*(pd.Series(data).quantile(q = 0.84) - pd.Series(data).quantile(q = 0.16))
-def plot_photoz(df_list, nbins, xvariable, metric, type_bin='bin',label_list=None, samp='zs', save=False):
-    #plot properties
-    plt.rcParams['font.family'] = 'serif'
-    plt.rcParams['font.size'] = 12
-    bin_edges = stats.mstats.mquantiles(df_list[0][xvariable].values, np.linspace(0.05, 1, nbins))
-    print(bin_edges)
-    cmap = plt.get_cmap('Dark2')  # Choose a colormap for coloring lines
-    plt.figure(figsize=(6, 5))
-    for i, df in enumerate(df_list):
-        ydata, xlab = [], []
-        for k in range(len(bin_edges)-1):
-            edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            mean_mag = (edge_max + edge_min) / 2
-            if type_bin == 'bin':
-                df_plot = df[(df[xvariable] > edge_min) & (df[xvariable] < edge_max)]
-            elif type_bin == 'cum':
-                df_plot = df[(df[xvariable] < edge_max)]
-            else:
-                raise ValueError("Only type_bin=='bin' for binned and 'cum' for cumulative are supported")
-            xlab.append(mean_mag)
-            if metric == 'sig68':
-                ydata.append(sigma68(df_plot.zwerr))
-            elif metric == 'bias':
-                ydata.append(np.mean(df_plot.zwerr))
-            elif metric == 'nmad':
-                ydata.append(nmad(df_plot.zwerr))
-            elif metric == 'outliers':
-                ydata.append(len(df_plot[np.abs(df_plot.zwerr) > 0.15]) / len(df_plot)*100)
-        print(ydata)
-        color = cmap(i)  # Get a different color for each dataframe
-        plt.plot(xlab, ydata, ls='-', marker='.', lw=1, label=f'{label_list[i]}', color=color)
-    if xvariable == 'VISmag':
-        xvariable_lab = 'VIS'
-    plt.ylabel(f'{metric} $[\\Delta z]$', fontsize=18)
-    plt.xlabel(f'{xvariable_lab}', fontsize=16)
-    plt.grid(False)
-    plt.legend()
-    if save==True:
-        plt.savefig(f'{metric}_{xvariable}_{samp}.pdf', dpi=300, bbox_inches='tight')
-    plt.show()
-def plot_nz(df, bins=np.arange(0,5,0.2)):
-    kwargs=dict( bins=bins,alpha=0.5)
-    plt.hist(df.zs.values, color='grey', ls='-' ,**kwargs)
-    counts, _, =np.histogram(df.z.values, bins=bins)
-    plt.plot((bins[:-1]+bins[1:])*0.5,counts, color ='purple')
-    #plt.legend(fontsize=14)
-    plt.xlabel(r'Redshift', fontsize=14)
-    plt.ylabel(r'Counts', fontsize=14)
-    plt.yscale('log')
-    plt.show()
-    return
-def plot_scatter(df, sample='specz', save=True):
-    # Calculate the point density
-    xy = np.vstack([df.zs.values,df.z.values])
-    zd = gaussian_kde(xy)(xy)
-    fig, ax = plt.subplots()
-    plt.scatter(df.zs.values, df.z.values,c=zd, s=1)
-    plt.xlim(0,5)
-    plt.ylim(0,5)
-    plt.xlabel(r'$z_{\rm s}$', fontsize = 14)
-    plt.ylabel('$z$', fontsize = 14)
-    plt.xticks(fontsize = 12)
-    plt.yticks(fontsize = 12)
-    if save==True:
-        plt.savefig(f'{sample}_scatter.pdf', dpi = 300, bbox_inches='tight')
-    plt.show()
-def maximum_mean_discrepancy(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     """
     Compute the Maximum Mean Discrepancy (MMD) between two sets of samples.
@@ -130,7 +39,8 @@ def maximum_mean_discrepancy(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num
     mmd_loss = torch.mean(x_kernel) + torch.mean(y_kernel) - 2 * torch.mean(xy_kernel)
     return mmd_loss
-def compute_kernel(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     """
     Compute the kernel matrix based on the chosen kernel type.
@@ -151,73 +61,77 @@ def compute_kernel(x, y, kernel_type='rbf', kernel_mul=2.0, kernel_num=5):
     x = x.unsqueeze(1).expand(x_size, y_size, dim)
     y = y.unsqueeze(0).expand(x_size, y_size, dim)
-    kernel_input = (x - y).pow(2).mean(2)  # Pairwise squared Euclidean distances
-    if kernel_type == 'linear':
         kernel_matrix = kernel_input
-    elif kernel_type == 'poly':
         kernel_matrix = (1 + kernel_input / kernel_mul).pow(kernel_num)
-    elif kernel_type == 'rbf':
         kernel_matrix = torch.exp(-kernel_input / (2 * kernel_mul**2))
-    elif kernel_type == 'sigmoid':
         kernel_matrix = torch.tanh(kernel_mul * kernel_input)
     else:
-        raise ValueError("Invalid kernel type. Supported types are 'linear', 'poly', 'rbf', and 'sigmoid'.")
     return kernel_matrix
-def select_cut(df,
-               completenss_lim=None,
-               nmad_lim = None,
-               outliers_lim=None,
-               return_df=False):
-    if (completenss_lim is None)&(nmad_lim is None)&(outliers_lim is None):
-        raise(ValueError("Select at least one cut"))
     elif sum(c is not None for c in [completenss_lim, nmad_lim, outliers_lim]) > 1:
         raise ValueError("Select only one cut at a time")
     else:
-        bin_edges = stats.mstats.mquantiles(df.zflag, np.arange(0,1.01,0.1))
-        scatter, eta, cmptnss, nobj = [],[],[], []
-        for k in range(len(bin_edges)-1):
             edge_min = bin_edges[k]
-            edge_max = bin_edges[k+1]
-            df_bin = df[(df.zflag > edge_min)]
-            cmptnss.append(np.round(len(df_bin)/len(df),2)*100)
             scatter.append(nmad(df_bin.zwerr))
-            eta.append(len(df_bin[np.abs(df_bin.zwerr)>0.15])/len(df_bin)*100)
             nobj.append(len(df_bin))
-        dfcuts = pd.DataFrame(data=np.c_[np.round(bin_edges[:-1],5), np.round(nobj,1), np.round(cmptnss,1), np.round(scatter,3), np.round(eta,2)], columns=['flagcut', 'Nobj','completeness', 'nmad', 'eta'])
     if completenss_lim is not None:
-        print('Selecting cut based on completeness')
-        selected_cut = dfcuts[dfcuts['completeness'] <= completenss_lim].iloc[0]
     elif nmad_lim is not None:
-        print('Selecting cut based on nmad')
-        selected_cut = dfcuts[dfcuts['nmad'] <= nmad_lim].iloc[0]
     elif outliers_lim is not None:
-        print('Selecting cut based on outliers')
-        selected_cut = dfcuts[dfcuts['eta'] <= outliers_lim].iloc[0]
-    print(f"This cut provides completeness of {selected_cut['completeness']}, nmad={selected_cut['nmad']} and eta={selected_cut['eta']}")
-    df_cut = df[(df.zflag > selected_cut['flagcut'])]
-    if return_df==True:
-        return df_cut, selected_cut['flagcut'], dfcuts
     else:
-        return selected_cut['flagcut'], dfcuts

 import matplotlib.pyplot as plt
 from scipy import stats
 import torch
+from loguru import logger
+def caluclate_eta(df):
+    return len(df[np.abs(df.zwerr)>0.15])/len(df) *100
+def nmad(data):
+    return 1.4826 * np.median(np.abs(data - np.median(data)))
+def sigma68(data):
+    return 0.5 * (pd.Series(data).quantile(q=0.84) - pd.Series(data).quantile(q=0.16))
+def maximum_mean_discrepancy(x, y, kernel_type="rbf", kernel_mul=2.0, kernel_num=5):
     """
     Compute the Maximum Mean Discrepancy (MMD) between two sets of samples.
     mmd_loss = torch.mean(x_kernel) + torch.mean(y_kernel) - 2 * torch.mean(xy_kernel)
     return mmd_loss
+def compute_kernel(x, y, kernel_type="rbf", kernel_mul=2.0, kernel_num=5):
     """
     Compute the kernel matrix based on the chosen kernel type.
     x = x.unsqueeze(1).expand(x_size, y_size, dim)
     y = y.unsqueeze(0).expand(x_size, y_size, dim)
+    kernel_input = (x - y).pow(2).mean(2)
+    if kernel_type == "linear":
         kernel_matrix = kernel_input
+    elif kernel_type == "poly":
         kernel_matrix = (1 + kernel_input / kernel_mul).pow(kernel_num)
+    elif kernel_type == "rbf":
         kernel_matrix = torch.exp(-kernel_input / (2 * kernel_mul**2))
+    elif kernel_type == "sigmoid":
         kernel_matrix = torch.tanh(kernel_mul * kernel_input)
     else:
+        raise ValueError(
+            "Invalid kernel type. Supported types are 'linear', 'poly', 'rbf', and 'sigmoid'."
+        )
     return kernel_matrix
+def select_cut(
+    df, completenss_lim=None, nmad_lim=None, outliers_lim=None, return_df=False
+):
+    if (completenss_lim is None) & (nmad_lim is None) & (outliers_lim is None):
+        raise (ValueError("Select at least one cut"))
     elif sum(c is not None for c in [completenss_lim, nmad_lim, outliers_lim]) > 1:
         raise ValueError("Select only one cut at a time")
     else:
+        bin_edges = stats.mstats.mquantiles(df.odds, np.arange(0, 1.01, 0.1))
+        scatter, eta, cmptnss, nobj = [], [], [], []
+        for k in range(len(bin_edges) - 1):
             edge_min = bin_edges[k]
+            edge_max = bin_edges[k + 1]
+            df_bin = df[(df.odds > edge_min)]
+            cmptnss.append(np.round(len(df_bin) / len(df), 2) * 100)
             scatter.append(nmad(df_bin.zwerr))
+            eta.append(len(df_bin[np.abs(df_bin.zwerr) > 0.15]) / len(df_bin) * 100)
             nobj.append(len(df_bin))
+        dfcuts = pd.DataFrame(
+            data=np.c_[
+                np.round(bin_edges[:-1], 5),
+                np.round(nobj, 1),
+                np.round(cmptnss, 1),
+                np.round(scatter, 3),
+                np.round(eta, 2),
+            ],
+            columns=["flagcut", "Nobj", "completeness", "nmad", "eta"],
+        )
     if completenss_lim is not None:
+        logger.info("Selecting cut based on completeness")
+        selected_cut = dfcuts[dfcuts["completeness"] <= completenss_lim].iloc[0]
     elif nmad_lim is not None:
+        logger.info("Selecting cut based on nmad")
+        selected_cut = dfcuts[dfcuts["nmad"] <= nmad_lim].iloc[0]
     elif outliers_lim is not None:
+        logger.info("Selecting cut based on outliers")
+        selected_cut = dfcuts[dfcuts["eta"] <= outliers_lim].iloc[0]
+    logger.info(
+        f"This cut provides completeness of {selected_cut['completeness']}, nmad={selected_cut['nmad']} and eta={selected_cut['eta']}"
+    )
+    df_cut = df[(df.odds > selected_cut["flagcut"])]
+    if return_df == True:
+        return df_cut, selected_cut["flagcut"], dfcuts
     else:
+        return selected_cut["flagcut"], dfcuts