Spaces:

ahmadluay
/

forest_fire_prediction

Runtime error

App Files Files Community

forest_fire_prediction / eda.py

ahmadluay

edit eda

9685626 over 1 year ago

raw history blame contribute delete

No virus

9.27 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import plotly.express as px
	import sklearn
	from sklearn.preprocessing import LabelEncoder

	from PIL import Image

	st.set_page_config(
	page_title='Predicting Forest Fire in Algeria Using Supervised Learning',
	layout = 'wide',
	initial_sidebar_state='expanded'
	)

	def run():
	# title
	st.title('Predicting Forest Fire in Algeria Using Supervised Learning')

	# sub header
	st.subheader ('Exploratory Data Analysis of the dataset.')

	# Add Image
	image = Image.open('forest fire.jpg')
	st.image(image,caption = 'Forest fire illustration')

	# Description
	st.write('Forest fires are a serious issue in Algeria, particularly during the summer months when hot, dry weather and strong winds increase the risk of fires spreading rapidly. The country has a significant amount of forested land, particularly in the northern coastal region, which is particularly vulnerable to fires. Predicting forest fires in Algeria is important in order to prevent or mitigate their impact.')
	st.write('# Dataset')
	st.write('This section explains the process of data loading. Dataset used in this analysis is Algerian Forest Fires Dataset on UCI Machine Learning Repository.')

	# show dataframe
	df = pd.read_csv('Algerian_forest_fires_dataset_UPDATE.csv')
	st.dataframe(df)
	# add description of Dataset
	st.write('Following are the variables and definitions of each column in the dataset.')
	st.write("`Date` : (DD/MM/YYYY) Day, month (june to september), year (2012)")
	st.write("`Temp` : temperature noon (temperature max) in Celsius degrees: 22 to 42")
	st.write("`RH` : Relative Humidity in %: 21 to 90")
	st.write("`Ws` : Wind speed in km/h: 6 to 29")
	st.write("`Rain` : total day in mm: 0 to 16.8")
	st.write("`FFMC` : Fine Fuel Moisture Code (FFMC) index from the FWI system: 28.6 to 92.5")
	st.write("`DMC` : Duff Moisture Code (DMC) index from the FWI system: 1.1 to 65.9")
	st.write("`DC` : Drought Code (DC) index from the FWI system: 7 to 220.4")
	st.write("`ISI` : Initial Spread Index (ISI) index from the FWI system: 0 to 18.5")
	st.write("`BUI` : Buildup Index (BUI) index from the FWI system: 1.1 to 68")
	st.write("`FWI` : Fire Weather Index (FWI) Index: 0 to 31.1")
	st.write("`Classes` : two classes, namely fire and not fire")

	# Forest Fire

	st.write('# Exploratory Data Analysis ')
	st.write('## Forest Fire')
	st.write('This section describes data exploration to determine the number of forest fires that ocurred in 2012 in Algeria.')
	# number of forest fire
	df_eda = df.copy()
	fire = df_eda.Classes.value_counts().to_frame().reset_index()

	# Plot PieChart with Plotly
	fig = px.pie(fire,values='Classes', names='index',color_discrete_sequence=['brown','orange'])
	fig.update_layout(title_text = "Number of Forest Fire", title_x = 0.5)
	fig.show()
	st.plotly_chart(fig)
	st.write('From the visualization above, the number of forest fires that occurred is balanced with the number of forest fires that did not occur. After knowing the number of forest fires, further exploration is carried out to find out when forest fires occur most frequently.')

	# The month with the most occurrence of forest fires

	st.write('### The month with the most occurrence of forest fires')
	st.write('This section describes data exploration for finding the `month` in which forest fires occur most frequently.')
	# create bar chart
	sns.set(font_scale=1)
	fig, ax = plt.subplots(figsize=(20, 8))

	sns.countplot(x=df_eda.month, hue=df_eda.Classes,palette='YlOrBr')

	plt.title('Number of forest fires by month')
	plt.xlabel('Month')
	plt.ylabel('Forest Fire Count')
	plt.show()
	st.pyplot(fig)
	st.write('From this dataset it is known that forest fires most often occur in the summer, to be precise in August. August has the highest average temperature compared to other months, the lowest average relative humidity compared to other months and the lowest average rain intensity compared to other months.')


	# Temperature

	st.write('## Temperature')
	st.write('This section describes data exploration to find the `Temperature` when a forest fire occurs.')
	# create bar chart
	sns.set(font_scale=1)
	fig, ax = plt.subplots(figsize=(20, 8))

	sns.countplot(x=df_eda.Temperature, hue=df_eda.Classes,palette='YlOrBr')

	plt.title('Temperature and Forest fires')
	plt.xlabel('Temperature')
	plt.ylabel('Count')
	plt.show()
	st.pyplot(fig)
	st.write('From the visualization above, the average temperature when a forest fire occurs is 33.82 degree Celsius.')

	# Relative Humidity

	st.write('## Relative Humidity')
	st.write('This section describes data exploration to find the level of `relative humidity` when a forest fire occurs.')
	# create a new column based on humidity group
	df_eda['RH_bins']=pd.cut(
	x=df_eda['RH'],
	bins=[21,26,31,36,41,46,51,56,61,66,71,76,81,86,91],
	labels=['21-25','26-30','31-35','36-40','41-45','46-50','51-55','56-60','61-65','66-70','71-75','76-80','81-85','86-90'])

	# create bar chart
	sns.set(font_scale=1.5)
	fig, ax = plt.subplots(figsize=(20, 8))

	sns.countplot(x=df_eda.RH_bins, hue=df_eda.Classes,palette='YlOrBr')

	plt.title('Relative Humidity and Forest Fires')
	plt.xlabel('Relative Humidity')
	plt.ylabel('Count')
	plt.show()
	st.pyplot(fig)
	st.write('From the visualization above, forest fires most often occur when the relative humidity is 55%. The more the relative humidity increases, the more likely it is that a forest fire will not occur.')

	# Rain

	st.write('## Rain')
	st.write('This section describes data exploration to find the intensity of `rain` when a forest fire occurs.')
	# forest fire when raining
	rain = df_eda[df_eda['Classes']=='fire'].Rain.value_counts().reset_index()
	rain = rain.rename(columns={'index':'rain_intensity'})
	# create bar chart
	sns.set(font_scale=1.5)
	fig, ax = plt.subplots(figsize=(20, 8))

	sns.countplot(x=df_eda[df_eda['Classes']=='fire'].Rain,palette='YlOrBr')

	plt.title('Rain and Forest Fires')
	plt.xlabel('Rain')
	plt.ylabel('Forest Fire Count')
	plt.show()
	st.pyplot(fig)
	st.write('From the visualization above, most forest fire incidents occur when it is not raining. But there are also forest fires that occur when it is raining with low intensity.')

	# Fire Weather Index (FWI) System

	st.write('## Fire Weather Index (FWI) System')
	st.write('This section explains the process of data exploration to find the `FWI` value that can cause forest fire. The Fire Weather Index (FWI) is a numeric rating of fire intensity. It is based on the ISI and the BUI, and __is used as a general index of fire danger throughout the forested areas__.')
	st.write('Structure of the FWI System')
	st.write('The diagram below illustrates the components of the FWI System. Calculation of the components is based on consecutive daily observations of `temperature`, `relative humidity`,`wind speed` and `24-hour precipitation`. The six standard components provide numeric ratings of relative potential for wildland fire.')
	# Add Image
	fwi = Image.open('fwi.png')
	st.image(fwi)
	st.write("FWI Range")
	st.write("`0 - 1` : Low ")
	st.write("`2 - 6` : Moderate")
	st.write("`7 - 13` : High")
	st.write("`> 13` : Very High")
	# FWI
	df_eda['FWI_cat']=pd.cut(
	x=df_eda['FWI'],
	bins=[-1,1,6,13,np.inf],
	labels=['Low','Moderate','High','Very High'])

	# create bar chart
	sns.set(font_scale=1.5)
	fig, ax = plt.subplots(figsize=(20, 8))

	sns.countplot(x=df_eda.FWI_cat,hue=df_eda.Classes,palette='YlOrBr')

	plt.title('FWI and Forest Fires')
	plt.xlabel('FWI')
	plt.ylabel('Count')
	plt.show()

	st.pyplot(fig)
	st.write('From the visualization above, the potential for forest fires increases when the FWI value is in the range of 2-6 (moderate).')

	# Correlation Matrix Analysis

	st.write('## Correlation Matrix Analysis')
	st.write('This section explains about correlation matrix analysis to find out the correlation between features and target (`Classes`). The cell below explains the process of performing a correlation matrix analysis to identify the features that are most strongly correlated with the target (`Classes`). To accomplish this, categorical data will be converted into numerical data using the `LabelEncoder` library.')
	df_copy = df.copy()

	# Using LabelEncoder to convert categorical into numerical data
	m_LabelEncoder = LabelEncoder()
	df_copy['Classes']=m_LabelEncoder.fit_transform(df_copy['Classes'])
	df_copy = df_copy.drop(['year'],axis=1)
	# Plotting Correlation Matrix
	sns.set(font_scale=1)
	fig = plt.figure(figsize=(15,15))
	sns.heatmap(df_copy.corr(),annot=True,cmap='coolwarm', fmt='.2f')
	st.pyplot(fig)
	st.write('From the visualization above, `month` and `Ws` has a low correlation to the target (`Classes`).')

	if __name__ == '__main__':
	run()