Milestone2 / eda.py
Bitha's picture
Upload 7 files
3fda8bc verified
# import library
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import json
# Load feature file
with open('feature.txt', 'r') as file:
feature = json.load(file)
# Set Config Halaman
st.set_page_config(
page_title = "Exploratory Data Analysis (EDA)",
)
def run():
# Set Title
st.title("Exploratory Data Analysis (EDA)")
# Load Data
df = pd.read_csv("Mobile_Price_Classification.csv")
# Create dataset header
st.subheader("Dataset Mobile Price Classification")
# Display the dataframe on streamlit
st.dataframe(df)
st.markdown('---')
# Display data distribution for each price range
st.subheader("Data Distribution for Each Price Range")
# Check how much data is in each price range
price_range_counts = df['price_range'].value_counts()
# Show the amount of data in each price category
st.write("Amount of Data in Each Price Rategory :")
st.write(price_range_counts)
# Show plot of price categories
st.write("Plot of the price category :")
colors = ['blue', 'green', 'orange', 'red']
fig, ax = plt.subplots(figsize=(8, 6))
ax.pie(price_range_counts, labels=price_range_counts.index, autopct='%1.1f%%', colors=colors)
st.pyplot(fig)
st.write('The amount of data in each price range is same, as 500 data for each price range.')
st.markdown('---')
# Display data distribution for each feature selected per price range
# Looping through each feature
for selected_column in feature:
# View the average in each price_range
mean_col = df.groupby('price_range')[selected_column].mean().sort_values()
# Title for each plot
plot_title = 'Average of {} column per Price Range'.format(selected_column)
st.subheader(plot_title)
# Color to use for each bar
colors = ['blue', 'green', 'orange', 'red']
# Visualization of average in each price_range
fig, ax = plt.subplots(figsize=(8, 6))
mean_col.plot.barh(color=colors, ax=ax)
# Add labels
ax.set_xlabel("Average of '{}' column".format(selected_column))
ax.set_ylabel('Price Range')
# Plot displays
st.pyplot(fig)
st.markdown('---')
if __name__== '__main__':
run()