ahmadluay's picture
edit eda
9685626
raw
history blame
9.27 kB
import streamlit as st
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import sklearn
from sklearn.preprocessing import LabelEncoder
from PIL import Image
st.set_page_config(
page_title='Predicting Forest Fire in Algeria Using Supervised Learning',
layout = 'wide',
initial_sidebar_state='expanded'
)
def run():
# title
st.title('Predicting Forest Fire in Algeria Using Supervised Learning')
# sub header
st.subheader ('Exploratory Data Analysis of the dataset.')
# Add Image
image = Image.open('forest fire.jpg')
st.image(image,caption = 'Forest fire illustration')
# Description
st.write('Forest fires are a serious issue in Algeria, particularly during the summer months when hot, dry weather and strong winds increase the risk of fires spreading rapidly. The country has a significant amount of forested land, particularly in the northern coastal region, which is particularly vulnerable to fires. Predicting forest fires in Algeria is important in order to prevent or mitigate their impact.')
st.write('# Dataset')
st.write('This section explains the process of data loading. Dataset used in this analysis is Algerian Forest Fires Dataset on UCI Machine Learning Repository.')
# show dataframe
df = pd.read_csv('Algerian_forest_fires_dataset_UPDATE.csv')
st.dataframe(df)
# add description of Dataset
st.write('Following are the variables and definitions of each column in the dataset.')
st.write("`Date` : (DD/MM/YYYY) Day, month (june to september), year (2012)")
st.write("`Temp` : temperature noon (temperature max) in Celsius degrees: 22 to 42")
st.write("`RH` : Relative Humidity in %: 21 to 90")
st.write("`Ws` : Wind speed in km/h: 6 to 29")
st.write("`Rain` : total day in mm: 0 to 16.8")
st.write("`FFMC` : Fine Fuel Moisture Code (FFMC) index from the FWI system: 28.6 to 92.5")
st.write("`DMC` : Duff Moisture Code (DMC) index from the FWI system: 1.1 to 65.9")
st.write("`DC` : Drought Code (DC) index from the FWI system: 7 to 220.4")
st.write("`ISI` : Initial Spread Index (ISI) index from the FWI system: 0 to 18.5")
st.write("`BUI` : Buildup Index (BUI) index from the FWI system: 1.1 to 68")
st.write("`FWI` : Fire Weather Index (FWI) Index: 0 to 31.1")
st.write("`Classes` : two classes, namely fire and not fire")
# Forest Fire
st.write('# Exploratory Data Analysis ')
st.write('## Forest Fire')
st.write('This section describes data exploration to determine the number of forest fires that ocurred in 2012 in Algeria.')
# number of forest fire
df_eda = df.copy()
fire = df_eda.Classes.value_counts().to_frame().reset_index()
# Plot PieChart with Plotly
fig = px.pie(fire,values='Classes', names='index',color_discrete_sequence=['brown','orange'])
fig.update_layout(title_text = "Number of Forest Fire", title_x = 0.5)
fig.show()
st.plotly_chart(fig)
st.write('From the visualization above, the number of forest fires that occurred is balanced with the number of forest fires that did not occur. After knowing the number of forest fires, further exploration is carried out to find out when forest fires occur most frequently.')
# The month with the most occurrence of forest fires
st.write('### The month with the most occurrence of forest fires')
st.write('This section describes data exploration for finding the `month` in which forest fires occur most frequently.')
# create bar chart
sns.set(font_scale=1)
fig, ax = plt.subplots(figsize=(20, 8))
sns.countplot(x=df_eda.month, hue=df_eda.Classes,palette='YlOrBr')
plt.title('Number of forest fires by month')
plt.xlabel('Month')
plt.ylabel('Forest Fire Count')
plt.show()
st.pyplot(fig)
st.write('From this dataset it is known that forest fires most often occur in the summer, to be precise in **August**. **August** has the highest average temperature compared to other months, the lowest average relative humidity compared to other months and the lowest average rain intensity compared to other months.')
# Temperature
st.write('## Temperature')
st.write('This section describes data exploration to find the `Temperature` when a forest fire occurs.')
# create bar chart
sns.set(font_scale=1)
fig, ax = plt.subplots(figsize=(20, 8))
sns.countplot(x=df_eda.Temperature, hue=df_eda.Classes,palette='YlOrBr')
plt.title('Temperature and Forest fires')
plt.xlabel('Temperature')
plt.ylabel('Count')
plt.show()
st.pyplot(fig)
st.write('From the visualization above, the average temperature when a forest fire occurs is 33.82 degree Celsius.')
# Relative Humidity
st.write('## Relative Humidity')
st.write('This section describes data exploration to find the level of `relative humidity` when a forest fire occurs.')
# create a new column based on humidity group
df_eda['RH_bins']=pd.cut(
x=df_eda['RH'],
bins=[21,26,31,36,41,46,51,56,61,66,71,76,81,86,91],
labels=['21-25','26-30','31-35','36-40','41-45','46-50','51-55','56-60','61-65','66-70','71-75','76-80','81-85','86-90'])
# create bar chart
sns.set(font_scale=1.5)
fig, ax = plt.subplots(figsize=(20, 8))
sns.countplot(x=df_eda.RH_bins, hue=df_eda.Classes,palette='YlOrBr')
plt.title('Relative Humidity and Forest Fires')
plt.xlabel('Relative Humidity')
plt.ylabel('Count')
plt.show()
st.pyplot(fig)
st.write('From the visualization above, forest fires most often occur when the relative humidity is 55%. The more the relative humidity increases, the more likely it is that a forest fire will not occur.')
# Rain
st.write('## Rain')
st.write('This section describes data exploration to find the intensity of `rain` when a forest fire occurs.')
# forest fire when raining
rain = df_eda[df_eda['Classes']=='fire'].Rain.value_counts().reset_index()
rain = rain.rename(columns={'index':'rain_intensity'})
# create bar chart
sns.set(font_scale=1.5)
fig, ax = plt.subplots(figsize=(20, 8))
sns.countplot(x=df_eda[df_eda['Classes']=='fire'].Rain,palette='YlOrBr')
plt.title('Rain and Forest Fires')
plt.xlabel('Rain')
plt.ylabel('Forest Fire Count')
plt.show()
st.pyplot(fig)
st.write('From the visualization above, most forest fire incidents occur when it is not raining. But there are also forest fires that occur when it is raining with low intensity.')
# Fire Weather Index (FWI) System
st.write('## Fire Weather Index (FWI) System')
st.write('This section explains the process of data exploration to find the `FWI` value that can cause forest fire. The Fire Weather Index (FWI) is a numeric rating of fire intensity. It is based on the ISI and the BUI, and __is used as a general index of fire danger throughout the forested areas__.')
st.write('**Structure of the FWI System**')
st.write('The diagram below illustrates the components of the FWI System. Calculation of the components is based on consecutive daily observations of `temperature`, `relative humidity`,`wind speed` and `24-hour precipitation`. The six standard components provide numeric ratings of relative potential for wildland fire.')
# Add Image
fwi = Image.open('fwi.png')
st.image(fwi)
st.write("FWI Range")
st.write("`0 - 1` : Low ")
st.write("`2 - 6` : Moderate")
st.write("`7 - 13` : High")
st.write("`> 13` : Very High")
# FWI
df_eda['FWI_cat']=pd.cut(
x=df_eda['FWI'],
bins=[-1,1,6,13,np.inf],
labels=['Low','Moderate','High','Very High'])
# create bar chart
sns.set(font_scale=1.5)
fig, ax = plt.subplots(figsize=(20, 8))
sns.countplot(x=df_eda.FWI_cat,hue=df_eda.Classes,palette='YlOrBr')
plt.title('FWI and Forest Fires')
plt.xlabel('FWI')
plt.ylabel('Count')
plt.show()
st.pyplot(fig)
st.write('From the visualization above, the potential for forest fires increases when the FWI value is in the range of 2-6 (moderate).')
# Correlation Matrix Analysis
st.write('## Correlation Matrix Analysis')
st.write('This section explains about correlation matrix analysis to find out the correlation between features and target (`Classes`). The cell below explains the process of performing a correlation matrix analysis to identify the features that are most strongly correlated with the target (`Classes`). To accomplish this, categorical data will be converted into numerical data using the `LabelEncoder` library.')
df_copy = df.copy()
# Using LabelEncoder to convert categorical into numerical data
m_LabelEncoder = LabelEncoder()
df_copy['Classes']=m_LabelEncoder.fit_transform(df_copy['Classes'])
df_copy = df_copy.drop(['year'],axis=1)
# Plotting Correlation Matrix
sns.set(font_scale=1)
fig = plt.figure(figsize=(15,15))
sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
st.pyplot(fig)
st.write('From the visualization above, `month` and `Ws` has a low correlation to the target (`Classes`).')
if __name__ == '__main__':
run()