File size: 3,364 Bytes
e92dcbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
import numpy as np
import pandas as pd
from copy import copy
import joblib
import tensorflow as tf
from mastml.models import EnsembleModel
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.wrappers.scikit_learn import KerasRegressor
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist

def rebuild_model(model_folder):
    PATH = os.getcwd()

    # We need to define the function that builds the network architecture
    def keras_model():
        model = Sequential()
        model.add(Dense(1024, input_dim=9, kernel_initializer='normal', activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(1024, kernel_initializer='normal', activation='relu'))
        model.add(Dropout(0.3))
        model.add(Dense(1, kernel_initializer='normal'))
        model.compile(loss='mean_squared_error', optimizer='adam')

        return model

    model_keras = KerasRegressor(build_fn=keras_model, epochs=250, batch_size=100, verbose=0)
    model_bagged_keras_rebuild = EnsembleModel(model=model_keras, n_estimators=10)

    num_models = 10
    models = list()
    for i in range(num_models):
        models.append(tf.keras.models.load_model(os.path.join(PATH, 'RPV_model'+'/'+model_folder+'/keras_model_' + str(i))))

    model_bagged_keras_rebuild.model.estimators_ = models
    model_bagged_keras_rebuild.model.estimators_features_ = [np.arange(0, 9) for i in models]

    return model_bagged_keras_rebuild

def get_preds_ebars(model, df_featurized, preprocessor, return_ebars=True):
    preds_each = list()
    ebars_each = list()

    df_featurized_scaled = preprocessor.transform(pd.DataFrame(df_featurized))

    if return_ebars == True:
        for i, x in df_featurized_scaled.iterrows():
            preds_per_data = list()
            for m in model.model.estimators_:
                preds_per_data.append(m.predict(pd.DataFrame(x).T, verbose=0)) #pd.DataFrame(x).T
            preds_each.append(np.mean(preds_per_data))
            ebars_each.append(np.std(preds_per_data))

    else:
        preds_each = model.predict(df_featurized_scaled)
        ebars_each = [np.nan for i in range(preds_each.shape[0])]

    if return_ebars == True:
        a = -0.041
        b = 2.041
        c = 3.124
        ebars_each_recal = a*np.array(ebars_each)**2 + b*np.array(ebars_each) + c
    else:
        ebars_each_recal = ebars_each


    return np.array(preds_each).ravel(), np.array(ebars_each_recal).ravel()

def make_predictions_DNN(df_featurized, model_folder):
    PATH = os.getcwd()

    # Rebuild the saved model
    model = rebuild_model(model_folder)

    # Normalize the input features
    preprocessor = joblib.load(os.path.join(PATH, os.path.join('RPV_model', model_folder+'/StandardScaler.pkl')))
    
    # Get predictions and error bars from model
    preds, ebars = get_preds_ebars(model, df_featurized, preprocessor, return_ebars=True)

    pred_dict = {'preds':preds,
                 'ebars':ebars}

    return pd.DataFrame(pred_dict)

def test(df):
    #pred_dict = {'preds': ['This is a test'], 'ebars': ['This is a test']}
    pred_arr = np.array([['here is some data'], ['here are some ebars']])
    model = keras.models.load_model('keras_model_0')
    return pred_arr
    #return pd.DataFrame(pred_dict)
    #return np.sqrt(x)