Spaces:

Daliny
/

sea_level_prediction_model

Runtime error

File size: 5,203 Bytes

9f4b4c1

import keras
import pandas as pd
import gradio as gr
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from hampel import hampel
import pickle
import matplotlib.pyplot as plt

df = pd.read_csv('merge_data_final.csv', index_col = 0).reset_index()
df = df.drop(columns = 'index')

#convert to mean sea level by adding  2.75 feet.
df['sea surface height'] = df['sea surface height']+0.8382  
df['Date'] = pd.to_datetime(df['Date'], format = '%Y/%m')


# Function to create a dataset
def create_dataset(df1, city):
    #Creating dataframe for one city
    df = df1[df1['City'] == city]  
    df.set_index('Date',inplace=True)
    ##outlier detection
    for i in ['sea surface temp', 'sea bottom temp', 'sea salinity',
       'sea surface height', 'CO2', 'seasonally adjust CO2',
       'greenland_avg_mass', 'antarctica_avg_mass', 'tavg_C', 'tmin_C',
       'tmax_C', 'prcp_m']:
        imputation_data = hampel(df[i], window_size=12, n=3, imputation=True)
        df[i]= imputation_data
    ## create rolling mean and std of sea surface height, the sliding window is set as 4
    df['mean_elevation4'] =df['sea surface height'].rolling(4).mean()
    df['std_elevation4'] =df['sea surface height'].rolling(4).std()
    df['mean_elevation4'].fillna(df['sea surface height'].rolling(2).mean(),inplace = True)
    df['mean_elevation4'].fillna(0,inplace = True)
    df['std_elevation4'].fillna(df['sea surface height'].rolling(2).std(),inplace = True)
    df['std_elevation4'].fillna(0,inplace = True)
    attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
       'mean_elevation4', 'std_elevation4']
    target = ['sea surface height']
    sc_x = MinMaxScaler(feature_range=(0, 1))
    sc_y = MinMaxScaler(feature_range=(0, 1))
    df[attribute] = sc_x.fit_transform(df[attribute])
    df[target] = sc_y.fit_transform(df[target]) 
    df.reset_index(0,inplace = True)
    return df,sc_x,sc_y
  

def process(df):
    attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
       'mean_elevation4', 'std_elevation4'] #list(selected_feat)
    target =['elevation_next_month','elevation_next2_month','elevation_next3_month']

    X = df[attribute]
    y = df[target]
    return np.array(X), np.array(y)
  
# Functions to transform data into 3D shape
def build_train_attribute(train, n_in):
    X_train = []
    for i in range(train.shape[0]-n_in+1):
        X_train.append(train[i:i+n_in]) 
    return np.array(X_train)



def get_prediction(city,lookback = 12, df = df):
      name = city.replace(' ','_')
      lstm_model = keras.models.load_model(f'{name}_lstm.h5')
      xgb_model = pickle.load(open(f'{name}_xgboost.sav', 'rb'))


      subdf,sc_x,sc_y = create_dataset(df, city)

      def process(df):
          attribute = ['month', 'sea surface temp', 'sea bottom temp', 'sea salinity','CO2','sea surface height','tavg_C','tmin_C', 'tmax_C', 'prcp_m',
           'mean_elevation4', 'std_elevation4'] #list(selected_feat)
          X = df[attribute]

      
          return np.array(X)
      df_att = process(subdf)

      x = build_train_attribute(df_att, lookback)

      prediction_lstm = sc_y.inverse_transform(lstm_model.predict(x))
      prediction_xgb =  sc_y.inverse_transform(xgb_model.predict(df_att))
      y_unscaled =   sc_y.inverse_transform(np.array(subdf['sea surface height']).reshape(-1, 1))

      prediction_average = (prediction_lstm+prediction_xgb[lookback-1:])/2
  
      return prediction_average,subdf,y_unscaled
  
def draw(city, df = df):
    pred,subdf, y_unscaled = get_prediction(city)
    pred_1st = pred[-1][0]
    pred_2nd = pred[-1][1]
    pred_3rd = pred[-1][2]
    fig = plt.figure()
  
    # plt.figure(figsize=(20,7))
    plt.plot(pd.period_range("2001-01", "2021-01",freq='M').to_timestamp(),pred[:,0],'r' , linewidth=4)
    plt.plot(subdf['Date'],y_unscaled,'green' , linewidth=2)
    plt.plot(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1],'blue' , linewidth=4)
    for date, pred in zip(pd.to_datetime(["2021-01-01","2021-02-01","2021-03-01"]),pred[-1]):
      plt.text(date,pred,pred, ha = 'left',va = 'top')
    plt.legend(('Past Predicted Value','Test','Future Predicted Value'))
    plt.xlabel('Time')
    plt.ylabel('Scaled Mean Sea Level (m)')
    plt.title('Average Ensemble Model Performance Compare to Historical data')
  
    # ax.set_ylim(0.8,1.2)


    return fig, pred_1st,pred_2nd,pred_3rd

output = gr.outputs.Plot(type="auto")
output1 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-01(m)")
output2 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
output3 = gr.outputs.Textbox(type="auto", label="Mean Sea Level Prediction in 2021-02(m)")
dropdown = gr.inputs.Dropdown(choices=list(df.City.unique()), type="value", default=None, label=None, optional=False)

app = gr.Interface(fn=draw, inputs=[dropdown], outputs=[output,output1,output2,output3],description="This model is only capable of predicting mean sea level in next three month based on historical data from 2000 to 2020")
app.launch()