import os os.system('pip3 install pdpbox==0.2.1') from pdpbox.pdp import pdp_isolate, pdp_plot from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_error from sklearn.linear_model import LinearRegression from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import SelectKBest from sklearn.ensemble import RandomForestRegressor import pandas as pd from numpy import mean import streamlit as st """ # IOT temperature """ merge = st.checkbox("Merge US country with State ?") all_region = st.checkbox("Take all Region ?", value=True) all_country = st.checkbox("Take all Country ?", value=True) all_city = st.checkbox("Take all City ?", value=True) df = pd.read_csv("city_temperature.csv") country = "all" if merge == True: df.loc[df['State'].notna(), 'Country'] = df['State'] df = df.loc[:, ~df.columns.str.contains('State')] if all_region == False: region = st.selectbox( 'Which region do you want to predict temparature ?', (df["Region"].unique()), index=0) df.drop(df.loc[df['Region'] != region].index, inplace=True) if all_country == False: country = st.selectbox( 'Which country do you want to predict temparature ?', (df["Country"].unique()), index=0) df.drop(df.loc[df['Country'] != country].index, inplace=True) if merge == False and country == "US": all_state = st.checkbox("Take all State ?", value=True) if all_state == False: state = st.selectbox( 'Which state do you want to predict temparature ?', (df["State"].unique()), index=0) df.drop(df.loc[df['State'] != state].index, inplace=True) if all_city == False: city = st.selectbox( 'Which city do you want to predict temparature ?', (df["City"].unique()), index=0) df.drop(df.loc[df['City'] != city].index, inplace=True) i = 0 for region in df["Region"].unique(): df["Region"] = df["Region"].replace(region, str(i)) i += 1 df = df.astype({"Region": "int"}) i = 0 for country in df["Country"].unique(): df["Country"] = df["Country"].replace(country, str(i)) i += 1 df = df.astype({"Country": "int"}) if merge == False: i = 0 for state in df["State"].unique(): df["State"] = df["State"].replace(state, str(i)) i += 1 df = df.astype({"State": "int"}) i = 0 for city in df["City"].unique(): df["City"] = df["City"].replace(city, str(i)) i += 1 df = df.astype({"City": "int"}) target = 'AvgTemperature' # Here Y would be our target Y = df[target] # Here X would contain the other column #X = df.loc[:, df.columns != target] X = df[['Month', 'Day', 'Year']] X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.25, random_state=42) y_pred = [Y_train.mean()] * len(Y_train) st.write('Baseline MAE: %f' % (round(mean_absolute_error(Y_train, y_pred), 5))) lm = make_pipeline(StandardScaler(), LinearRegression(),) lm.fit(X_train, Y_train) st.write('Linear Regression Training MAE: %f' % (round(mean_absolute_error(Y_train, lm.predict(X_train)), 5))) st.write('Linear Regression Test MAE: %f' % (round(mean_absolute_error(Y_val, lm.predict(X_val)), 5))) forestModel = make_pipeline( SelectKBest(k="all"), StandardScaler(), RandomForestRegressor( n_estimators=100, max_depth=50, random_state=77, n_jobs=-1)) forestModel.fit (X_train, Y_train) st.write('Random Forest Regressor Model Training MAE: %f' % (mean_absolute_error(Y_train, forestModel.predict(X_train)))) st.write('Random Forest Regressor Model Test MAE: %f' % (mean_absolute_error(Y_val, forestModel.predict(X_val))))