File size: 2,447 Bytes
9ff6f28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# import library
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import json

# Load feature file
with open('feature.txt', 'r') as file:
    feature = json.load(file)

# Set Config Halaman
st.set_page_config(
    page_title = "Exploratory Data Analysis (EDA)",
)

def run():

    # Set Judul
    st.title("Exploratory Data Analysis (EDA)")

    # Load Data
    df = pd.read_csv("/Users/salsasabithah/Documents/FTDS/1-Phase/Milestone 2/p1-ftds014-hck-m2-Salsasbth/Mobile_Price_Classification.csv")

    # Create dataset header
    st.subheader("Dataset Mobile Price Classification")

    # Display the dataframe on streamlit
    st.dataframe(df)
    st.markdown('---')

    # Display data distribution for each price range
    st.subheader("Data Distribution for Each Price Range")

    # Check how much data is in each price range
    price_range_counts = df['price_range'].value_counts()

    # Show the amount of data in each price category
    st.write("Amount of Data in Each Price Rategory :")
    st.write(price_range_counts)

    # Show plot of price categories
    st.write("Plot of the price category :")
    colors = ['blue', 'green', 'orange', 'red']
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.pie(price_range_counts, labels=price_range_counts.index, autopct='%1.1f%%', colors=colors)
    st.pyplot(fig)

    st.write('The amount of data in each price range is same, as 500 data for each price range.')
    st.markdown('---')

    # Display data distribution for each feature selected per price range
    # Looping through each feature
    for selected_column in feature:

        # View the average in each price_range
        mean_col = df.groupby('price_range')[selected_column].mean().sort_values()

        # Title for each plot
        plot_title = 'Average of {} column per Price Range'.format(selected_column)
        st.subheader(plot_title)

        # Color to use for each bar
        colors = ['blue', 'green', 'orange', 'red']

        # Visualization of average in each price_range
        fig, ax = plt.subplots(figsize=(8, 6))
        mean_col.plot.barh(color=colors, ax=ax)

        # Add labels
        ax.set_xlabel("Average of '{}' column".format(selected_column))
        ax.set_ylabel('Price Range')

        # Plot displays
        st.pyplot(fig)
        st.markdown('---')

if __name__== '__main__':
    run()