Spaces:
Runtime error
Runtime error
import keras | |
import pandas as pd | |
import gradio as gr | |
from sklearn.preprocessing import MinMaxScaler | |
import numpy as np | |
from hampel import hampel | |
import pickle | |
import matplotlib.pyplot as plt | |
df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index() | |
df = df.drop(columns = 'index') | |
#convert to mean sea level by adding 2.75 feet. | |
df['sea surface height'] = df['sea surface height']+0.8382 | |
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m') | |
# Function to create a dataset | |
def create_dataset(df1, city): | |
#Creating dataframe for one city | |
df = df1[df1['City'] == city] | |
df.set_index('Date',inplace=True) | |
##outlier detection | |
for i in ['sea surface temp', 'sea bottom temp', 'sea salinity', | |
'sea surface height', 'CO2', 'seasonally adjust CO2', | |
'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C', | |
'tmax_C', 'prcp_m']: | |
imputation_data = hampel(df[i], window_size=12, n=3, imputation=True) | |
df[i]= imputation_data | |
## create rolling mean and std of sea surface height, the sliding window is set as 4 | |
df['mean_elevation4'] =df['sea surface height'].rolling(4).mean() | |
df['std_elevation4'] =df['sea surface height'].rolling(4).std() | |
df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True) | |
df['mean_elevation4'].fillna(0,inplace = True) | |
df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True) | |
df['std_elevation4'].fillna(0,inplace = True) | |
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
'mean_elevation4', 'std_elevation4'] | |
target = ['sea surface height'] | |
sc_x = MinMaxScaler(feature_range=(0, 1)) | |
sc_y = MinMaxScaler(feature_range=(0, 1)) | |
df[attribute] = sc_x.fit_transform(df[attribute]) | |
df[target] = sc_y.fit_transform(df[target]) | |
df.reset_index(0,inplace = True) | |
return df,sc_x,sc_y | |
def process(df): | |
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
'mean_elevation4', 'std_elevation4'] #list(selected_feat) | |
target =['elevation_next_month','elevation_next2_month','elevation_next3_month'] | |
X = df[attribute] | |
y = df[target] | |
return np.array(X), np.array(y) | |
# Functions to transform data into 3D shape | |
def build_train_attribute(train, n_in): | |
X_train = [] | |
for i in range(train.shape[0]-n_in+1): | |
X_train.append(train[i:i+n_in]) | |
return np.array(X_train) | |
def get_prediction(city,lookback = 12, df = df): | |
name = city.replace(' ','_') | |
lstm_model = keras.models.load_model(f'{name}_lstm.h5') | |
xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb')) | |
subdf,sc_x,sc_y = create_dataset(df, city) | |
def process(df): | |
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m', | |
'mean_elevation4', 'std_elevation4'] #list(selected_feat) | |
X = df[attribute] | |
return np.array(X) | |
df_att = process(subdf) | |
x = build_train_attribute(df_att, lookback) | |
prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x)) | |
prediction_xgb = sc_y.inverse_transform(xgb_model.predict(df_att)) | |
y_unscaled = sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1)) | |
prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2 | |
return prediction_average,subdf,y_unscaled | |
def draw(city, df = df): | |
pred,subdf, y_unscaled = get_prediction(city) | |
pred_1st = pred[-1][0] | |
pred_2nd = pred[-1][1] | |
pred_3rd = pred[-1][2] | |
fig = plt.figure() | |
# plt.figure(figsize=(20,7)) | |
plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4) | |
plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2) | |
plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4) | |
for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]): | |
plt.text(date,pred,pred, ha = 'left',va = 'top') | |
plt.legend(('Past Predicted Value','Test','Future Predicted Value')) | |
plt.xlabel('Time') | |
plt.ylabel('Scaled Mean Sea Level (m)') | |
plt.title('Average Ensemble Model Performance Compare to Historical data') | |
# ax.set_ylim(0.8,1.2) | |
return fig, pred_1st,pred_2nd,pred_3rd | |
output = gr.outputs.Plot(type="auto") | |
output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)") | |
output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") | |
output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)") | |
dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False) | |
app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020") | |
app.launch() |