Daliny's picture
Upload app.py
9f4b4c1
raw
history blame contribute delete
5.2 kB
import keras
import pandas as pd
import gradio as gr
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from hampel import hampel
import pickle
import matplotlib.pyplot as plt
df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index()
df = df.drop(columns = 'index')
#convert to mean sea level by adding 2.75 feet.
df['sea surface height'] = df['sea surface height']+0.8382
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m')
# Function to create a dataset
def create_dataset(df1, city):
#Creating dataframe for one city
df = df1[df1['City'] == city]
df.set_index('Date',inplace=True)
##outlier detection
for i in ['sea surface temp', 'sea bottom temp', 'sea salinity',
'sea surface height', 'CO2', 'seasonally adjust CO2',
'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C',
'tmax_C', 'prcp_m']:
imputation_data = hampel(df[i], window_size=12, n=3, imputation=True)
df[i]= imputation_data
## create rolling mean and std of sea surface height, the sliding window is set as 4
df['mean_elevation4'] =df['sea surface height'].rolling(4).mean()
df['std_elevation4'] =df['sea surface height'].rolling(4).std()
df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True)
df['mean_elevation4'].fillna(0,inplace = True)
df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True)
df['std_elevation4'].fillna(0,inplace = True)
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4']
target = ['sea surface height']
sc_x = MinMaxScaler(feature_range=(0, 1))
sc_y = MinMaxScaler(feature_range=(0, 1))
df[attribute] = sc_x.fit_transform(df[attribute])
df[target] = sc_y.fit_transform(df[target])
df.reset_index(0,inplace = True)
return df,sc_x,sc_y
def process(df):
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4'] #list(selected_feat)
target =['elevation_next_month','elevation_next2_month','elevation_next3_month']
X = df[attribute]
y = df[target]
return np.array(X), np.array(y)
# Functions to transform data into 3D shape
def build_train_attribute(train, n_in):
X_train = []
for i in range(train.shape[0]-n_in+1):
X_train.append(train[i:i+n_in])
return np.array(X_train)
def get_prediction(city,lookback = 12, df = df):
name = city.replace(' ','_')
lstm_model = keras.models.load_model(f'{name}_lstm.h5')
xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb'))
subdf,sc_x,sc_y = create_dataset(df, city)
def process(df):
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4'] #list(selected_feat)
X = df[attribute]
return np.array(X)
df_att = process(subdf)
x = build_train_attribute(df_att, lookback)
prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x))
prediction_xgb = sc_y.inverse_transform(xgb_model.predict(df_att))
y_unscaled = sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1))
prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2
return prediction_average,subdf,y_unscaled
def draw(city, df = df):
pred,subdf, y_unscaled = get_prediction(city)
pred_1st = pred[-1][0]
pred_2nd = pred[-1][1]
pred_3rd = pred[-1][2]
fig = plt.figure()
# plt.figure(figsize=(20,7))
plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4)
plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2)
plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4)
for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]):
plt.text(date,pred,pred, ha = 'left',va = 'top')
plt.legend(('Past Predicted Value','Test','Future Predicted Value'))
plt.xlabel('Time')
plt.ylabel('Scaled Mean Sea Level (m)')
plt.title('Average Ensemble Model Performance Compare to Historical data')
# ax.set_ylim(0.8,1.2)
return fig, pred_1st,pred_2nd,pred_3rd
output = gr.outputs.Plot(type="auto")
output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)")
output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False)
app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020")
app.launch()