import keras import pandas as pd import gradio as gr from sklearn.preprocessing import MinMaxScaler import numpy as np from hampel import hampel import pickle import matplotlib.pyplot as plt df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index() df = df.drop(columns = 'index') #convert to mean sea level by adding 2.75 feet. df['sea surface height'] = df['sea surface height']+0.8382 df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m') # Function to create a dataset def create_dataset(df1, city): #Creating dataframe for one city df = df1[df1['City'] == city] df.set_index('Date',inplace=True) ##outlier detection for i in ['sea surface temp', 'sea bottom temp', 'sea salinity', 'sea surface height', 'CO2', 'seasonally adjust CO2', 'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C', 'tmax_C', 'prcp_m']: imputation_data = hampel(df[i], window_size=12, n=3, imputation=True) df[i]= imputation_data ## create rolling mean and std of sea surface height, the sliding window is set as 4 df['mean_elevation4'] =df['sea surface height'].rolling(4).mean() df['std_elevation4'] =df['sea surface height'].rolling(4).std() df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True) df['mean_elevation4'].fillna(0,inplace = True) df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True) df['std_elevation4'].fillna(0,inplace = True) attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m', 'mean_elevation4', 'std_elevation4'] target = ['sea surface height'] sc_x = MinMaxScaler(feature_range=(0, 1)) sc_y = MinMaxScaler(feature_range=(0, 1)) df[attribute] = sc_x.fit_transform(df[attribute]) df[target] = sc_y.fit_transform(df[target]) df.reset_index(0,inplace = True) return df,sc_x,sc_y def process(df): attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', 'mean_elevation4', 'std_elevation4'] #list(selected_feat) target =['elevation_next_month','elevation_next2_month','elevation_next3_month'] X = df[attribute] y = df[target] return np.array(X), np.array(y) # Functions to transform data into 3D shape def build_train_attribute(train, n_in): X_train = [] for i in range(train.shape[0]-n_in+1): X_train.append(train[i:i+n_in]) return np.array(X_train) def get_prediction(city,lookback = 12, df = df): name = city.replace(' ','_') lstm_model = keras.models.load_model(f'{name}_lstm.h5') xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb')) subdf,sc_x,sc_y = create_dataset(df, city) def process(df): attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', 'mean_elevation4', 'std_elevation4'] #list(selected_feat) X = df[attribute] return np.array(X) df_att = process(subdf) x = build_train_attribute(df_att, lookback) prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x)) prediction_xgb = sc_y.inverse_transform(xgb_model.predict(df_att)) y_unscaled = sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1)) prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2 return prediction_average,subdf,y_unscaled def draw(city, df = df): pred,subdf, y_unscaled = get_prediction(city) pred_1st = pred[-1][0] pred_2nd = pred[-1][1] pred_3rd = pred[-1][2] fig = plt.figure() # plt.figure(figsize=(20,7)) plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4) plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2) plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4) for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]): plt.text(date,pred,pred, ha = 'left',va = 'top') plt.legend(('Past Predicted Value','Test','Future Predicted Value')) plt.xlabel('Time') plt.ylabel('Scaled Mean Sea Level (m)') plt.title('Average Ensemble Model Performance Compare to Historical data') # ax.set_ylim(0.8,1.2) return fig, pred_1st,pred_2nd,pred_3rd output = gr.outputs.Plot(type="auto") output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)") output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False) app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020") app.launch()