File size: 3,761 Bytes
3730bdf
f6a56da
3730bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
e4d5cfc
3730bdf
 
55f7cb4
3730bdf
55f7cb4
 
 
3730bdf
d7b041a
3730bdf
0039660
 
4e0afd5
 
 
 
 
55f7cb4
 
 
 
a575d01
 
 
0039660
a575d01
 
0039660
a575d01
0039660
 
 
 
 
 
 
 
a575d01
 
0039660
a575d01
 
0039660
a575d01
 
b6e3518
3730bdf
 
 
 
 
ff74936
 
3730bdf
 
 
 
 
 
 
ff74936
 
 
 
 
 
 
 
 
 
3730bdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
os.system('pip3 install pdpbox==0.2.1')

from pdpbox.pdp import pdp_isolate, pdp_plot
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.ensemble import RandomForestRegressor
import pandas as pd
from numpy import mean
import streamlit as st

"""
# IOT temperature
"""

merge = st.checkbox("Merge US country with State ?")

all_region = st.checkbox("Take all Region ?", value=True)
all_country = st.checkbox("Take all Country ?", value=True)
all_city = st.checkbox("Take all City ?", value=True)

df = pd.read_csv("city_temperature.csv")

country = "all"

if merge == True:
    df.loc[df['State'].notna(), 'Country'] = df['State']
    df = df.loc[:, ~df.columns.str.contains('State')]
   

if all_region == False:
    region = st.selectbox(
         'Which region do you want to predict temparature ?',
         (df["Region"].unique()), index=0)
    df.drop(df.loc[df['Region'] != region].index, inplace=True)

if all_country == False:
    country = st.selectbox(
         'Which country do you want to predict temparature ?',
         (df["Country"].unique()), index=0)
    df.drop(df.loc[df['Country'] != country].index, inplace=True)

if merge == False and country == "US":
    all_state = st.checkbox("Take all State ?", value=True)
    
    if all_state == False:
        state = st.selectbox(
             'Which state do you want to predict temparature ?',
             (df["State"].unique()), index=0)
        df.drop(df.loc[df['State'] != state].index, inplace=True)

if all_city == False:
    city = st.selectbox(
         'Which city do you want to predict temparature ?',
         (df["City"].unique()), index=0)
    df.drop(df.loc[df['City'] != city].index, inplace=True)
    
    

i = 0

for region in df["Region"].unique():
    df["Region"] = df["Region"].replace(region, str(i))
    i += 1

df = df.astype({"Region": "int"})
    
i = 0

for country in df["Country"].unique():
    df["Country"] = df["Country"].replace(country, str(i))
    i += 1

df = df.astype({"Country": "int"})

if merge == False:
    i = 0
    
    for state in df["State"].unique():
        df["State"] = df["State"].replace(state, str(i))
        i += 1
        
    df = df.astype({"State": "int"})
    
i = 0

for city in df["City"].unique():
    df["City"] = df["City"].replace(city, str(i))
    i += 1

df = df.astype({"City": "int"})

target = 'AvgTemperature'
# Here Y would be our target
Y = df[target]
# Here X would contain the other column
#X = df.loc[:, df.columns != target]
X = df[['Month', 'Day', 'Year']]

X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=0.25, random_state=42)

y_pred = [Y_train.mean()] * len(Y_train)

st.write('Baseline MAE: %f' % (round(mean_absolute_error(Y_train, y_pred), 5)))

lm = make_pipeline(StandardScaler(), LinearRegression(),)

lm.fit(X_train, Y_train)

st.write('Linear Regression Training MAE: %f' % (round(mean_absolute_error(Y_train, lm.predict(X_train)), 5)))
st.write('Linear Regression Test MAE: %f' % (round(mean_absolute_error(Y_val, lm.predict(X_val)), 5)))

forestModel = make_pipeline(
    SelectKBest(k="all"), 
    StandardScaler(), 
    RandomForestRegressor(
        n_estimators=100,
        max_depth=50,
        random_state=77,
        n_jobs=-1))

forestModel.fit (X_train, Y_train)

st.write('Random Forest Regressor Model Training MAE: %f' % (mean_absolute_error(Y_train, forestModel.predict(X_train))))
st.write('Random Forest Regressor Model Test MAE: %f' % (mean_absolute_error(Y_val, forestModel.predict(X_val))))