Fucheng commited on
Commit
b60babb
1 Parent(s): 39adf11

Delete network.py

Browse files
Files changed (1) hide show
  1. network.py +0 -422
network.py DELETED
@@ -1,422 +0,0 @@
1
- import tensorflow as tf
2
- import os
3
- import numpy as np
4
-
5
- # the package needed when use Spec_Checker class
6
- try:
7
- from astropy.table import Table
8
- import matplotlib.pyplot as plt
9
- import numpy as np
10
- import pandas as pd
11
- import seaborn as sns
12
- found = True
13
- except ImportError:
14
- found = False
15
- print('error, pack not found####')
16
-
17
-
18
- # https://www.tensorflow.org/guide/keras/train_and_evaluate?hl=zh-cn
19
- class Metric_Fun(tf.keras.metrics.Metric):
20
- """
21
- A customized metric.
22
- metric = accraacy - mae.
23
- The larger it is, the better.
24
- The ideal value is 1.0, where acc=1 and mae=0.
25
- """
26
- def __init__(self,name="Metric_Fun", **kwargs):
27
- super(Metric_Fun,self).__init__(name=name, **kwargs)
28
- self.evalue = self.add_weight('evalue', initializer='zeros')
29
- # https://www.tensorflow.org/api_docs/python/tf/keras/metrics/BinaryAccuracy
30
- self.acc = tf.keras.metrics.BinaryAccuracy()
31
- # https://www.tensorflow.org/api_docs/python/tf/keras/metrics/MeanAbsoluteError
32
- self.mae = tf.keras.metrics.MeanAbsoluteError()
33
-
34
- def update_state(self, y_true, y_pred, sample_weight=None):
35
-
36
- y_true = tf.cast(y_true,dtype=tf.float32)
37
- y_pred = tf.cast(y_pred, dtype=tf.float32)
38
-
39
- self.mae.update_state(y_true[:,4:5], y_pred[:,4:5])
40
- abs_error = self.mae.result()
41
-
42
- self.acc.update_state(y_true[ : , 0:4], y_pred[ : , 0:4])
43
- accracy = self.acc.result()
44
-
45
- #evalue = accracy
46
- evalue = accracy - abs_error
47
- self.evalue.assign(evalue)
48
-
49
- def result(self):
50
- return self.evalue
51
-
52
- def reset_state(self):
53
- # The state of the metric will be reset at the start of each epoch.
54
- self.evalue.assign(0.0)
55
-
56
-
57
-
58
-
59
- class GasNet3:
60
- """
61
- Initialize, setting the input pixel, strat wavelength, end wavelength, and output channel
62
- and the network name
63
- """
64
- def __init__(self,Network_name,Output_channel):
65
- #self.Input_pixel = 10000
66
- #self.Start_wavelength = 4000
67
- #self.End_wavelength = 9000
68
- #self.Input_wavelength = np.linspace(self.Start_wavelength,self.End_wavelength,self.Input_pixel)
69
-
70
- self.Network_name = Network_name
71
- self.Input_wavelength = np.load('./test_data/wavelengths.npy')
72
- self.Input_pixel = len(self.Input_wavelength)
73
- self.Inpt = tf.keras.layers.Input(shape=(self.Input_pixel,1)) #shape of spectra
74
- self.Output_channel = Output_channel
75
- self.batch = 128 # training batch
76
- self.redshift_range = [0,4]
77
- self.class_names = {b'AGN':0,b'GALAXY':1,b'QSO':2,b'STAR':3}
78
- self.lable_dim = len(self.class_names)
79
-
80
- def Wavelength_Grid(self):
81
- """
82
- Return the grid of input wavelength
83
- """
84
- return self.Input_wavelength
85
-
86
- def Interpolate_Flux(self,wavelength,flux):
87
- """
88
- Interpolate the specturm flux into a suitable shape
89
- """
90
- if flux.ndim != 1:
91
- Int_flux = [np.interp(self.Input_wavelength,wavelength[i],flux[i]) for i in range(len(flux))]
92
- Int_flux = np.array(Int_flux)
93
- else:
94
- Int_flux = np.interp(self.Input_wavelength,wavelength,flux)
95
- return Int_flux
96
-
97
-
98
- def Append_Noise_Sample(self):
99
- """
100
- a extra blank noise will add during training
101
- """
102
- pass
103
-
104
-
105
- def Block_ResNet(self,x0,n):
106
- """
107
- one ResNet Block, to reduce feature dimension
108
- """
109
- core_size = 5
110
- x=tf.keras.layers.Conv1D(n,kernel_size=core_size,strides=2,padding='same')(x0)
111
- x=tf.keras.layers.BatchNormalization()(x)
112
- x=tf.keras.layers.Activation('relu')(x)
113
- x=tf.keras.layers.Conv1D(2*n, kernel_size=core_size,padding='same')(x)
114
- x=tf.keras.layers.BatchNormalization()(x)
115
- ShortCut = tf.keras.layers.Conv1D(2*n,kernel_size=2,strides=2,padding='same')(x0)
116
- x = tf.keras.layers.Add()([x,ShortCut])
117
- x=tf.keras.layers.Activation('relu')(x)
118
- x = tf.keras.layers.MaxPooling1D(pool_size=core_size,strides=2)(x)
119
- return x
120
-
121
- def Block_ResNet_2(self,x0,n):
122
- """
123
- one ResNet Block, to not reduce feature dimension, but extend channels.
124
- """
125
- core_size = 3
126
- x=tf.keras.layers.Conv1D(n,kernel_size=core_size,strides=1,padding='same')(x0)
127
- x=tf.keras.layers.BatchNormalization()(x)
128
- x=tf.keras.layers.Activation('relu')(x)
129
- x=tf.keras.layers.Conv1D(n,kernel_size=core_size,strides=1,padding='same')(x)
130
- x=tf.keras.layers.BatchNormalization()(x)
131
- x=tf.keras.layers.Activation('relu')(x)
132
- x=tf.keras.layers.Conv1D(2*n, kernel_size=core_size,strides=1,padding='same')(x)
133
- x=tf.keras.layers.BatchNormalization()(x)
134
- ShortCut = tf.keras.layers.Conv1D(2*n,kernel_size=1,strides=1,padding='same')(x0)
135
- x = tf.keras.layers.Add()([x,ShortCut])
136
- x=tf.keras.layers.Activation('relu')(x)
137
- return x
138
-
139
- def ResNet(self,x):
140
- """
141
- Networks made by Blocks
142
- """
143
- x = self.Block_ResNet(x,16)
144
- x = self.Block_ResNet(x,32)
145
- x = self.Block_ResNet(x,64)
146
- x = self.Block_ResNet(x,128)
147
- x = self.Block_ResNet(x,256)
148
- #x = self.Block_ResNet(x,512)
149
- #x = self.Block_ResNet(x,1024)
150
- x = tf.keras.layers.Flatten()(x)
151
- x = tf.keras.layers.Dense(1024, activation='relu')(x)
152
- x = tf.keras.layers.Dense(self.Output_channel,activation=None)(x)
153
- x0 = tf.keras.layers.Activation('softmax')(x[ : , 0: self.lable_dim])
154
- x1 = x[ : , self.lable_dim : self.Output_channel]
155
- x = tf.keras.layers.Concatenate(axis=-1)([x0, x1])
156
- return x
157
-
158
- def ResNet_test(self,x):
159
- """
160
- Networks for testing
161
- """
162
- x = self.Block_ResNet(x,16)
163
- x = self.Block_ResNet(x,32)
164
- x = self.Block_ResNet(x,64)
165
- x = self.Block_ResNet(x,128)
166
- x = self.Block_ResNet_2(x,256)
167
- x = self.Block_ResNet_2(x,512)
168
- x = self.Block_ResNet_2(x,1024)
169
-
170
- x = tf.keras.layers.Flatten()(x)
171
- x = tf.keras.layers.Dense(1024, activation='relu')(x)
172
- #x = tf.keras.layers.Dropout(0.4)(x)
173
- x = tf.keras.layers.Dense(self.Output_channel,activation=None)(x)
174
- x0 = tf.keras.layers.Activation('softmax')(x[ : , 0: self.lable_dim])
175
- x1 = x[ : , self.lable_dim : self.Output_channel]
176
- x = tf.keras.layers.Concatenate(axis=-1)([x0, x1])
177
- return x
178
-
179
- def Built_Model(self,test=False):
180
- """
181
- Return the ResNet mdoels
182
- """
183
- if test:
184
- model = tf.keras.Model(inputs=self.Inpt,outputs=self.ResNet_test(self.Inpt),name=self.Network_name)
185
- else:
186
- model = tf.keras.Model(inputs=self.Inpt,outputs=self.ResNet(self.Inpt),name=self.Network_name)
187
- model.summary()
188
- return model
189
-
190
- def Plot_Model(self,test=False):
191
- """
192
- Plot the network architecture
193
- """
194
- model = self.Built_Model(test)
195
- tf.keras.utils.plot_model(model,to_file=model.name+'.pdf',show_shapes=True,show_layer_names=False)
196
-
197
- def Data_Clip(self,label,redshift):
198
- """
199
- Conevrt the label to one-hot code.
200
- Redshfit are set on a range.
201
- Contact them into a vector.
202
- """
203
- # reshape the label and reshift array
204
- label = np.array(label)
205
- redshift = np.array(redshift)
206
- redshift = redshift.reshape(len(redshift),1)
207
- # convert to one-hot coded
208
- value = np.vectorize(self.class_names.get)(label)
209
- label = tf.keras.utils.to_categorical(value, num_classes=self.lable_dim)
210
- redshift = np.clip(redshift, self.redshift_range[0]-1, self.redshift_range[1]+1)
211
- redshift = tf.convert_to_tensor(redshift)
212
- vector = tf.concat([label,redshift],axis=-1) # a veter made by label and redshift concation
213
- return vector
214
-
215
- def Preprocess(self,flux):
216
- """
217
- The input flux and label should be propocess
218
- """
219
- #flux = flux - np.mean(flux,-1)
220
- flux = tf.keras.utils.normalize(flux,axis=-1) # flux/sum(flux**2)**0.5
221
- # https://www.tensorflow.org/api_docs/python/tf/math/divide_no_nan
222
- # flux = tf.math.divide_no_nan(flux, np.max(flux,axis=-1).reshape(flux.shape[0],1)) # Norm to 0-1
223
- # flux = -np.log10(np.maximum(flux,0)+1e-26)
224
- # flux = -np.log10(np.abs(flux)+1e-26)
225
- # flux = np.clip(flux,0,4)
226
- return flux
227
-
228
- def Loss_Func(self,y_true,y_pred):
229
- """
230
- The loss function of this models.
231
- loss = absolute redshift error + label entroy
232
- """
233
- # redshift_error
234
- Huber = tf.keras.losses.Huber(0.01)
235
- error = Huber(y_true[ : , self.lable_dim : self.Output_channel], y_pred[ : , self.lable_dim : self.Output_channel])
236
- # entropy
237
- Cce = tf.keras.losses.CategoricalCrossentropy()
238
- crossentropy = Cce(y_true[ : , 0:self.lable_dim], y_pred[ : , 0:self.lable_dim])
239
- #loss = crossentropy
240
- loss = error + crossentropy
241
- return loss
242
-
243
-
244
- def Train_Model(self,data,lr=1e-3,epo=40,test=False):
245
- """
246
- Training the model.
247
- Input training data.
248
- """
249
- batch = self.batch
250
- if os.path.exists(self.Network_name+'.h5'):
251
- model = tf.keras.models.load_model(self.Network_name+'.h5',custom_objects={'Loss_Func':self.Loss_Func,'Metric_Fun':Metric_Fun()})
252
- print('loading the existed model')
253
- else:
254
- model = self.Built_Model(test)
255
- optimizer = tf.keras.optimizers.Adam(learning_rate=lr) # Adam
256
- model.compile(optimizer,loss=self.Loss_Func,metrics=Metric_Fun()) # complize model
257
- # https://tensorflow.google.cn/api_docs/python/tf/keras/callbacks/ModelCheckpoint
258
- checkPoint = tf.keras.callbacks.ModelCheckpoint(model.name+'.h5',monitor='val_Metric_Fun',mode='max',verbose=1,save_best_only=True,save_weights_only=False)# callback function
259
- csvLogger = tf.keras.callbacks.CSVLogger(model.name+'.csv',append=True) # save training history
260
- train_x, train_y = self.Preprocess(data['train']['flux']), self.Data_Clip(data['train']['label'],data['train']['redshift'])
261
- valid_x, valid_y = self.Preprocess(data['valid']['flux']), self.Data_Clip(data['valid']['label'],data['valid']['redshift'])
262
- model.fit(train_x,train_y,epochs=epo,batch_size=batch, validation_data=(valid_x,valid_y),callbacks=[checkPoint,csvLogger],shuffle=True)
263
-
264
- def Prodiction(self,flux,lamb=[]):
265
- """
266
- Predition, classes and redshift.
267
- """
268
- if len(lamb) != 0:
269
- flux = self.Interpolate_Flux(lamb,flux)
270
- model = tf.keras.models.load_model(self.Network_name+'.h5',custom_objects={'Loss_Func':self.Loss_Func,'Metric_Fun':Metric_Fun()})
271
- flux = self.Preprocess(flux)
272
- pred = model.predict(flux) # give classes and reshift
273
-
274
- pred_label,pred_redshift = np.hsplit(pred, [self.lable_dim])
275
- pred_label = np.argmax(pred_label,axis=-1) # turn one-hot to integer value, get the max value index
276
- dict = {v:k for k, v in self.class_names.items()} # reverse key and value of a dict
277
- pred_label = np.vectorize(dict.get)(pred_label) # turn integer value to its name
278
-
279
- return pred_label,pred_redshift
280
-
281
-
282
-
283
- class Spec_Checker():
284
-
285
- def __init__(self):
286
- self.gasnet = GasNet3('test_net',Output_channel=5)
287
-
288
- def Show_spec(self,lamb,flux,name=''):
289
- """
290
- show the detail of spectra after interpolated and preprocessed
291
- """
292
- plt.figure(figsize=(16,6),dpi=160)
293
- int_flux = self.gasnet.Interpolate_Flux(lamb,flux)
294
-
295
- plt.subplot(2,1,1)
296
- plt.title(name + '-After interpolated')
297
- plt.plot(lamb,flux,linewidth=0.5,label='original flux')
298
- plt.plot(self.gasnet.Input_wavelength,int_flux,linewidth=0.5,label='interpolate flux')
299
- plt.legend()
300
-
301
- plt.subplot(2,1,2)
302
- plt.title(name + '-After preprocessed')
303
- plt.plot(lamb,self.gasnet.Preprocess(flux)[0],linewidth=0.5,label='original flux')
304
- plt.plot(self.gasnet.Input_wavelength,self.gasnet.Preprocess(int_flux)[0],linewidth=0.5,label='interpolate flux')
305
- plt.legend()
306
-
307
- def SDSS_spec(self,file,plot=True):
308
- """
309
- load the spectra from SDSS files
310
- """
311
- data = Table.read(file)
312
- flux, lamb = data['flux'], 10**data['loglam']
313
- if plot:
314
- self.Show_spec(lamb,flux, name='SDSS:' + file.rsplit('/')[-1])
315
- spec_info = Table.read(file,2)
316
- redshift, classes = spec_info['Z'][0], spec_info['CLASS'][0]
317
- return {'wavelength':lamb,'flux':flux,'redshift':redshift,'label':classes}
318
-
319
- def SDSS_spec_stack(self,num=0,plot=True):
320
- """
321
- load the spectra of validation
322
- """
323
- wavelength = self.gasnet.Input_wavelength
324
- data = Table.read('train_data/val.fits')
325
- flux,label,redshift = data['int_flux'],data['train_label'],data['Z']
326
- wavelength = np.repeat([wavelength], len(flux), axis=0)
327
- if plot:
328
- self.Show_spec(wavelength[num],flux[num], name='validation:' + str(num))
329
- return {'wavelength':wavelength,'flux':flux,'redshift':redshift,'label':label}
330
-
331
- def JK_spec(self,file):
332
- """
333
- load the spectrum files from JK mock
334
- """
335
- data = Table.read(file)
336
- flux, lamb = data['FLUX'][0], data['WAVE'][0]
337
- self.Show_spec(lamb,flux, name='JK:' + file.rsplit('/')[-1])
338
-
339
- def npy_file(self,num=0,plot=True):
340
- """
341
- load the spectrum files from qcp test data
342
- """
343
- wavelength = np.load('./test_data/wavelengths.npy')
344
- flux = np.load('./test_data/data.npy')
345
- wavelength = np.repeat([wavelength], len(flux), axis=0)
346
- if plot:
347
- self.Show_spec(wavelength[num],flux[num], name='test npy:' + str(num))
348
- label = np.load('./test_data/labels.npy')
349
- dict = {v:k for k, v in self.gasnet.class_names.items()} # reverse key and value of a dict
350
- label = np.vectorize(dict.get)(label) # turn integer value to its name
351
- return {'wavelength':wavelength,'flux':flux,'redshift':None,'label':label}
352
-
353
- def Luke_spec(self,num=0,plot=True):
354
- """
355
- load the spectrum files from Luck mock
356
- """
357
- spec_file = '../Luke_mock_spectra/Luke_spec.fits' # flux need multiply 1e17
358
- data = Table.read(spec_file)
359
- wavelength = np.load('./test_data/wavelengths.npy')
360
- wavelength = np.repeat([wavelength], len(data), axis=0)
361
- flux = data['int_flux']
362
- if plot:
363
- self.Show_spec(wavelength[num],flux[num], name='Luck :' + str(num))
364
- return {'wavelength':wavelength,'flux':flux,'redshift':data['Redshift'],'label':data['train_label']}
365
-
366
- def JK_stack_spec(self,num=0,plot=True):
367
- """
368
- load the spectrum files from Luck mock
369
- """
370
- spec_file = './JK_stack_mock.fits'
371
- data = Table.read(spec_file)
372
- wavelength = Table.read('JK_mock_sample.fits')['WAVE'][0]
373
- wavelength = np.repeat([wavelength], len(data), axis=0)
374
- flux,label,redshift = data['FLUX'],data['train_type'],data['REDSHIFT']
375
- if plot:
376
- self.Show_spec(wavelength[num],flux[num], name='JK--num--'+str(num)+'--label--'+str(label[num])+'--redshift--'+str(redshift[num]))
377
- return {'wavelength':wavelength,'flux':flux,'redshift':redshift,'label':label}
378
-
379
- def Svae_Figure(self,data,name='test'):
380
- """
381
- plot a serial of spectra in one pdf file
382
- """
383
- figfile = 'figure'
384
- if not os.path.exists(figfile):
385
- os.mkdir(figfile)
386
- fig, axes = plt.subplots(nrows=len(data['flux']),ncols=1,sharex=True,figsize=(8,2*len(data)),dpi=50)
387
- fig.suptitle(name)
388
- plt.xlabel('wavelength')
389
- plt.ylabel('flux')
390
- for i in range(len(data['flux'])):
391
- axe = axes[i]
392
- axe.plot(data['wavelength'][i],data['flux'][i],linewidth=0.5,label=data['label'][i]+' z='+str(data['redshift'][i]))
393
- axe.legend()
394
- fname = os.path.join(figfile,str(name)+'.pdf')
395
- plt.savefig(fname)
396
- plt.close()
397
-
398
- def Confusion_Matrix(self,pred,real):
399
- """
400
- plot the confusion matrix
401
- """
402
- data = {'Actual':np.array(real).flatten(),'Predicted':np.array(pred).flatten()}
403
- df = pd.DataFrame(data)
404
- plt.figure(figsize=(8,6),dpi=160)
405
- confusion_matrix = pd.crosstab(df['Actual'], df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
406
- sns.heatmap(confusion_matrix,cmap="crest", annot=True)
407
-
408
- def One2One(self,pred,real,label):
409
- """
410
- plot the redicted redshift vs. real
411
- """
412
- data = {'pred_redshift':np.array(pred).flatten(),
413
- 'real_redshift':np.array(real).flatten(),
414
- 'label':np.array(label).flatten()}
415
- df = pd.DataFrame(data)
416
- # print(df.dtypes)
417
- df['real_redshift'] = df['real_redshift'].astype('float32')
418
- # https://seaborn.pydata.org/generated/seaborn.lmplot.html#seaborn.lmplot
419
- sns.lmplot(data=df, x='pred_redshift', y='real_redshift', hue='label',col='label',
420
- col_wrap=2, height=6, #plot size
421
- line_kws={"alpha":0.1}, #ci=None, #line style
422
- scatter_kws={"s":1,"alpha":1},sharex=False, sharey=False)