File size: 9,266 Bytes
a6c8c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9685626
a6c8c66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import sklearn
from sklearn.preprocessing import LabelEncoder

from PIL import Image

st.set_page_config(
    page_title='Predicting Forest Fire in Algeria Using Supervised Learning',
    layout = 'wide',
    initial_sidebar_state='expanded'
)

def run():
    # title
    st.title('Predicting Forest Fire in Algeria Using Supervised Learning')

    # sub header
    st.subheader ('Exploratory Data Analysis of the dataset.')

    # Add Image
    image = Image.open('forest fire.jpg')
    st.image(image,caption = 'Forest fire illustration')

    # Description
    st.write('Forest fires are a serious issue in Algeria, particularly during the summer months when hot, dry weather and strong winds increase the risk of fires spreading rapidly. The country has a significant amount of forested land, particularly in the northern coastal region, which is particularly vulnerable to fires. Predicting forest fires in Algeria is important in order to prevent or mitigate their impact.')
    st.write('# Dataset') 
    st.write('This section explains the process of data loading. Dataset used in this analysis is Algerian Forest Fires Dataset on UCI Machine Learning Repository.')

    # show dataframe
    df = pd.read_csv('Algerian_forest_fires_dataset_UPDATE.csv')
    st.dataframe(df)
    # add description of Dataset
    st.write('Following are the variables and definitions of each column in the dataset.')
    st.write("`Date` : 	(DD/MM/YYYY) Day, month (june to september), year (2012)")
    st.write("`Temp` : temperature noon (temperature max) in Celsius degrees: 22 to 42")
    st.write("`RH` : 	Relative Humidity in %: 21 to 90")
    st.write("`Ws` : 	Wind speed in km/h: 6 to 29")
    st.write("`Rain` : total day in mm: 0 to 16.8")
    st.write("`FFMC` : Fine Fuel Moisture Code (FFMC) index from the FWI system: 28.6 to 92.5")
    st.write("`DMC` : Duff Moisture Code (DMC) index from the FWI system: 1.1 to 65.9")
    st.write("`DC` : Drought Code (DC) index from the FWI system: 7 to 220.4")
    st.write("`ISI` : Initial Spread Index (ISI) index from the FWI system: 0 to 18.5")
    st.write("`BUI` : Buildup Index (BUI) index from the FWI system: 1.1 to 68")
    st.write("`FWI` : Fire Weather Index (FWI) Index: 0 to 31.1")
    st.write("`Classes` : two classes, namely fire and not fire")

    # Forest Fire

    st.write('# Exploratory Data Analysis ')
    st.write('## Forest Fire')
    st.write('This section describes data exploration to determine the number of forest fires that ocurred in 2012 in Algeria.')
    # number of forest fire
    df_eda = df.copy()
    fire = df_eda.Classes.value_counts().to_frame().reset_index()
    
    # Plot PieChart with Plotly
    fig = px.pie(fire,values='Classes', names='index',color_discrete_sequence=['brown','orange'])
    fig.update_layout(title_text = "Number of Forest Fire", title_x = 0.5)
    fig.show()
    st.plotly_chart(fig)
    st.write('From the visualization above, the number of forest fires that occurred is balanced with the number of forest fires that did not occur. After knowing the number of forest fires, further exploration is carried out to find out when forest fires occur most frequently.')

    # The month with the most occurrence of forest fires

    st.write('### The month with the most occurrence of forest fires')
    st.write('This section describes data exploration for finding the `month` in which forest fires occur most frequently.')
    # create bar chart
    sns.set(font_scale=1)
    fig, ax = plt.subplots(figsize=(20, 8))

    sns.countplot(x=df_eda.month, hue=df_eda.Classes,palette='YlOrBr')

    plt.title('Number of forest fires by month')
    plt.xlabel('Month')
    plt.ylabel('Forest Fire Count')
    plt.show()
    st.pyplot(fig)
    st.write('From this dataset it is known that forest fires most often occur in the summer, to be precise in **August**. **August** has the highest average temperature compared to other months, the lowest average relative humidity compared to other months and the lowest average rain intensity compared to other months.')

    
    # Temperature

    st.write('## Temperature')
    st.write('This section describes data exploration to find the `Temperature` when a forest fire occurs.')
    # create bar chart
    sns.set(font_scale=1)
    fig, ax = plt.subplots(figsize=(20, 8))

    sns.countplot(x=df_eda.Temperature, hue=df_eda.Classes,palette='YlOrBr')

    plt.title('Temperature and Forest fires')
    plt.xlabel('Temperature')
    plt.ylabel('Count')
    plt.show()
    st.pyplot(fig)
    st.write('From the visualization above, the average temperature when a forest fire occurs is 33.82 degree Celsius.')
    
    # Relative Humidity

    st.write('## Relative Humidity')
    st.write('This section describes data exploration to find the level of `relative humidity` when a forest fire occurs.')
    # create a new column based on humidity group
    df_eda['RH_bins']=pd.cut(
    x=df_eda['RH'],
    bins=[21,26,31,36,41,46,51,56,61,66,71,76,81,86,91],
    labels=['21-25','26-30','31-35','36-40','41-45','46-50','51-55','56-60','61-65','66-70','71-75','76-80','81-85','86-90'])

    # create bar chart
    sns.set(font_scale=1.5)
    fig, ax = plt.subplots(figsize=(20, 8))

    sns.countplot(x=df_eda.RH_bins, hue=df_eda.Classes,palette='YlOrBr')

    plt.title('Relative Humidity and Forest Fires')
    plt.xlabel('Relative Humidity')
    plt.ylabel('Count')
    plt.show()
    st.pyplot(fig)
    st.write('From the visualization above, forest fires most often occur when the relative humidity is 55%. The more the relative humidity increases, the more likely it is that a forest fire will not occur.')
    
    # Rain

    st.write('## Rain')
    st.write('This section describes data exploration to find the intensity of `rain` when a forest fire occurs.')
    # forest fire when raining
    rain = df_eda[df_eda['Classes']=='fire'].Rain.value_counts().reset_index()
    rain = rain.rename(columns={'index':'rain_intensity'})
    # create bar chart
    sns.set(font_scale=1.5)
    fig, ax = plt.subplots(figsize=(20, 8))

    sns.countplot(x=df_eda[df_eda['Classes']=='fire'].Rain,palette='YlOrBr')

    plt.title('Rain and Forest Fires')
    plt.xlabel('Rain')
    plt.ylabel('Forest Fire Count')
    plt.show()
    st.pyplot(fig)
    st.write('From the visualization above, most forest fire incidents occur when it is not raining. But there are also forest fires that occur when it is raining with low intensity.')

    # Fire Weather Index (FWI) System

    st.write('## Fire Weather Index (FWI) System')
    st.write('This section explains the process of data exploration to find the `FWI` value that can cause forest fire. The Fire Weather Index (FWI) is a numeric rating of fire intensity. It is based on the ISI and the BUI, and __is used as a general index of fire danger throughout the forested areas__.')
    st.write('**Structure of the FWI System**')
    st.write('The diagram below illustrates the components of the FWI System. Calculation of the components is based on consecutive daily observations of `temperature`, `relative humidity`,`wind speed` and `24-hour precipitation`. The six standard components provide numeric ratings of relative potential for wildland fire.')
    # Add Image
    fwi = Image.open('fwi.png')
    st.image(fwi)
    st.write("FWI Range")
    st.write("`0 - 1` : Low ")
    st.write("`2 - 6` : Moderate")
    st.write("`7 - 13` : High")
    st.write("`> 13` : Very High")
    # FWI
    df_eda['FWI_cat']=pd.cut(
    x=df_eda['FWI'],
    bins=[-1,1,6,13,np.inf],
    labels=['Low','Moderate','High','Very High'])

    # create bar chart
    sns.set(font_scale=1.5)
    fig, ax = plt.subplots(figsize=(20, 8))

    sns.countplot(x=df_eda.FWI_cat,hue=df_eda.Classes,palette='YlOrBr')

    plt.title('FWI and Forest Fires')
    plt.xlabel('FWI')
    plt.ylabel('Count')
    plt.show()

    st.pyplot(fig)
    st.write('From the visualization above, the potential for forest fires increases when the FWI value is in the range of 2-6 (moderate).')

    # Correlation Matrix Analysis

    st.write('## Correlation Matrix Analysis')
    st.write('This section explains about correlation matrix analysis to find out the correlation between features and target (`Classes`). The cell below explains the process of performing a correlation matrix analysis to identify the features that are most strongly correlated with the target (`Classes`). To accomplish this, categorical data will be converted into numerical data using the `LabelEncoder` library.')
    df_copy = df.copy()

    # Using LabelEncoder to convert categorical into numerical data
    m_LabelEncoder = LabelEncoder()
    df_copy['Classes']=m_LabelEncoder.fit_transform(df_copy['Classes'])
    df_copy = df_copy.drop(['year'],axis=1)
    # Plotting Correlation Matrix
    sns.set(font_scale=1)
    fig = plt.figure(figsize=(15,15))
    sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
    st.pyplot(fig)
    st.write('From the visualization above, `month` and `Ws` has a low correlation to the target (`Classes`).')

if __name__ == '__main__':
    run()