File size: 5,203 Bytes
9f4b4c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import keras
import pandas as pd
import gradio as gr
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from hampel import hampel
import pickle
import matplotlib.pyplot as plt

df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index()
df = df.drop(columns = 'index')

#convert to mean sea level by adding  2.75 feet.
df['sea surface height'] = df['sea surface height']+0.8382  
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m')


# Function to create a dataset
def create_dataset(df1, city):
    #Creating dataframe for one city
    df = df1[df1['City'] == city]  
    df.set_index('Date',inplace=True)
    ##outlier detection
    for i in ['sea surface temp', 'sea bottom temp', 'sea salinity',
       'sea surface height', 'CO2', 'seasonally adjust CO2',
       'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C',
       'tmax_C', 'prcp_m']:
        imputation_data = hampel(df[i], window_size=12, n=3, imputation=True)
        df[i]= imputation_data
    ## create rolling mean and std of sea surface height, the sliding window is set as 4
    df['mean_elevation4'] =df['sea surface height'].rolling(4).mean()
    df['std_elevation4'] =df['sea surface height'].rolling(4).std()
    df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True)
    df['mean_elevation4'].fillna(0,inplace = True)
    df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True)
    df['std_elevation4'].fillna(0,inplace = True)
    attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
       'mean_elevation4', 'std_elevation4']
    target = ['sea surface height']
    sc_x = MinMaxScaler(feature_range=(0, 1))
    sc_y = MinMaxScaler(feature_range=(0, 1))
    df[attribute] = sc_x.fit_transform(df[attribute])
    df[target] = sc_y.fit_transform(df[target]) 
    df.reset_index(0,inplace = True)
    return df,sc_x,sc_y
  

def process(df):
    attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
       'mean_elevation4', 'std_elevation4'] #list(selected_feat)
    target =['elevation_next_month','elevation_next2_month','elevation_next3_month']

    X = df[attribute]
    y = df[target]
    return np.array(X), np.array(y)
  
# Functions to transform data into 3D shape
def build_train_attribute(train, n_in):
    X_train = []
    for i in range(train.shape[0]-n_in+1):
        X_train.append(train[i:i+n_in]) 
    return np.array(X_train)



def get_prediction(city,lookback = 12, df = df):
      name = city.replace(' ','_')
      lstm_model = keras.models.load_model(f'{name}_lstm.h5')
      xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb'))


      subdf,sc_x,sc_y = create_dataset(df, city)

      def process(df):
          attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
           'mean_elevation4', 'std_elevation4'] #list(selected_feat)
          X = df[attribute]

      
          return np.array(X)
      df_att = process(subdf)

      x = build_train_attribute(df_att, lookback)

      prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x))
      prediction_xgb =  sc_y.inverse_transform(xgb_model.predict(df_att))
      y_unscaled =   sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1))

      prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2
  
      return prediction_average,subdf,y_unscaled
  
def draw(city, df = df):
    pred,subdf, y_unscaled = get_prediction(city)
    pred_1st = pred[-1][0]
    pred_2nd = pred[-1][1]
    pred_3rd = pred[-1][2]
    fig = plt.figure()
  
    # plt.figure(figsize=(20,7))
    plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4)
    plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2)
    plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4)
    for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]):
      plt.text(date,pred,pred, ha = 'left',va = 'top')
    plt.legend(('Past Predicted Value','Test','Future Predicted Value'))
    plt.xlabel('Time')
    plt.ylabel('Scaled Mean Sea Level (m)')
    plt.title('Average Ensemble Model Performance Compare to Historical data')
  
    # ax.set_ylim(0.8,1.2)


    return fig, pred_1st,pred_2nd,pred_3rd

output = gr.outputs.Plot(type="auto")
output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)")
output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False)

app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020")
app.launch()