Spaces:

Bitha
/

Milestone_2

Runtime error

File size: 2,447 Bytes

9ff6f28

# import library
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import json

# Load feature file
with open('feature.txt', 'r') as file:
    feature = json.load(file)

# Set Config Halaman
st.set_page_config(
    page_title = "Exploratory Data Analysis (EDA)",
)

def run():

    # Set Judul
    st.title("Exploratory Data Analysis (EDA)")

    # Load Data
    df = pd.read_csv("/Users/salsasabithah/Documents/FTDS/1-Phase/Milestone 2/p1-ftds014-hck-m2-Salsasbth/Mobile_Price_Classification.csv")

    # Create dataset header
    st.subheader("Dataset Mobile Price Classification")

    # Display the dataframe on streamlit
    st.dataframe(df)
    st.markdown('---')

    # Display data distribution for each price range
    st.subheader("Data Distribution for Each Price Range")

    # Check how much data is in each price range
    price_range_counts = df['price_range'].value_counts()

    # Show the amount of data in each price category
    st.write("Amount of Data in Each Price Rategory :")
    st.write(price_range_counts)

    # Show plot of price categories
    st.write("Plot of the price category :")
    colors = ['blue', 'green', 'orange', 'red']
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.pie(price_range_counts, labels=price_range_counts.index, autopct='%1.1f%%', colors=colors)
    st.pyplot(fig)

    st.write('The amount of data in each price range is same, as 500 data for each price range.')
    st.markdown('---')

    # Display data distribution for each feature selected per price range
    # Looping through each feature
    for selected_column in feature:

        # View the average in each price_range
        mean_col = df.groupby('price_range')[selected_column].mean().sort_values()

        # Title for each plot
        plot_title = 'Average of {} column per Price Range'.format(selected_column)
        st.subheader(plot_title)

        # Color to use for each bar
        colors = ['blue', 'green', 'orange', 'red']

        # Visualization of average in each price_range
        fig, ax = plt.subplots(figsize=(8, 6))
        mean_col.plot.barh(color=colors, ax=ax)

        # Add labels
        ax.set_xlabel("Average of '{}' column".format(selected_column))
        ax.set_ylabel('Price Range')

        # Plot displays
        st.pyplot(fig)
        st.markdown('---')

if __name__== '__main__':
    run()