Spaces:
Runtime error
Runtime error
File size: 5,203 Bytes
9f4b4c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import keras
import pandas as pd
import gradio as gr
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from hampel import hampel
import pickle
import matplotlib.pyplot as plt
df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index()
df = df.drop(columns = 'index')
#convert to mean sea level by adding 2.75 feet.
df['sea surface height'] = df['sea surface height']+0.8382
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m')
# Function to create a dataset
def create_dataset(df1, city):
#Creating dataframe for one city
df = df1[df1['City'] == city]
df.set_index('Date',inplace=True)
##outlier detection
for i in ['sea surface temp', 'sea bottom temp', 'sea salinity',
'sea surface height', 'CO2', 'seasonally adjust CO2',
'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C',
'tmax_C', 'prcp_m']:
imputation_data = hampel(df[i], window_size=12, n=3, imputation=True)
df[i]= imputation_data
## create rolling mean and std of sea surface height, the sliding window is set as 4
df['mean_elevation4'] =df['sea surface height'].rolling(4).mean()
df['std_elevation4'] =df['sea surface height'].rolling(4).std()
df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True)
df['mean_elevation4'].fillna(0,inplace = True)
df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True)
df['std_elevation4'].fillna(0,inplace = True)
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4']
target = ['sea surface height']
sc_x = MinMaxScaler(feature_range=(0, 1))
sc_y = MinMaxScaler(feature_range=(0, 1))
df[attribute] = sc_x.fit_transform(df[attribute])
df[target] = sc_y.fit_transform(df[target])
df.reset_index(0,inplace = True)
return df,sc_x,sc_y
def process(df):
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4'] #list(selected_feat)
target =['elevation_next_month','elevation_next2_month','elevation_next3_month']
X = df[attribute]
y = df[target]
return np.array(X), np.array(y)
# Functions to transform data into 3D shape
def build_train_attribute(train, n_in):
X_train = []
for i in range(train.shape[0]-n_in+1):
X_train.append(train[i:i+n_in])
return np.array(X_train)
def get_prediction(city,lookback = 12, df = df):
name = city.replace(' ','_')
lstm_model = keras.models.load_model(f'{name}_lstm.h5')
xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb'))
subdf,sc_x,sc_y = create_dataset(df, city)
def process(df):
attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
'mean_elevation4', 'std_elevation4'] #list(selected_feat)
X = df[attribute]
return np.array(X)
df_att = process(subdf)
x = build_train_attribute(df_att, lookback)
prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x))
prediction_xgb = sc_y.inverse_transform(xgb_model.predict(df_att))
y_unscaled = sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1))
prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2
return prediction_average,subdf,y_unscaled
def draw(city, df = df):
pred,subdf, y_unscaled = get_prediction(city)
pred_1st = pred[-1][0]
pred_2nd = pred[-1][1]
pred_3rd = pred[-1][2]
fig = plt.figure()
# plt.figure(figsize=(20,7))
plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4)
plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2)
plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4)
for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]):
plt.text(date,pred,pred, ha = 'left',va = 'top')
plt.legend(('Past Predicted Value','Test','Future Predicted Value'))
plt.xlabel('Time')
plt.ylabel('Scaled Mean Sea Level (m)')
plt.title('Average Ensemble Model Performance Compare to Historical data')
# ax.set_ylim(0.8,1.2)
return fig, pred_1st,pred_2nd,pred_3rd
output = gr.outputs.Plot(type="auto")
output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)")
output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False)
app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020")
app.launch() |