Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.cluster import KMeans | |
import matplotlib.pyplot as plt | |
df = pd.read_csv("mycardata.csv") | |
# Main content | |
st.title('Interactive Car Data Clustering :blue_car:') | |
st.write('**Data Preview:**') | |
st.dataframe(df) | |
st.write('**Data Processed:**') | |
df['Selling_Price'] *= 10000 | |
df['Present_Price'] *= 10000 | |
df = pd.get_dummies(df, columns=['Transmission', 'Seller_Type', 'Fuel_Type']) | |
# Outlier | |
clip_limits = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].quantile(0.97) | |
df[['Selling_Price', 'Present_Price', 'Kms_Driven']] = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].clip(upper=clip_limits, axis=1) | |
st.dataframe(df) | |
n_clusters = st.slider('Pick Number Of Clusters',min_value=2, max_value=10, value=4) | |
# Perform K-means clustering | |
X = df[['Selling_Price', 'Kms_Driven']] | |
kmeans = KMeans(n_clusters=n_clusters, random_state=42) | |
df['cluster'] = kmeans.fit_predict(X) | |
# Create a 2x2 grid of subplots | |
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 8)) | |
# Define the colors for the clusters | |
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan'] | |
# Plot 1: Price vs. Kilometers Driven | |
for i in range(n_clusters): | |
cluster_data = df[df['cluster'] == i] | |
axs[0, 0].scatter(cluster_data['Selling_Price'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}') | |
axs[0, 0].set_xlabel('Price') | |
axs[0, 0].set_ylabel('Kilometers Driven') | |
axs[0, 0].set_title('KM to Price') | |
# Plot 2: Year vs. Kilometers Driven | |
for i in range(n_clusters): | |
cluster_data = df[df['cluster'] == i] | |
axs[0, 1].scatter(cluster_data['Year'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}') | |
axs[0, 1].set_xlabel('Year') | |
axs[0, 1].set_ylabel('Kilometers Driven') | |
axs[0, 1].set_title('KM to Years') | |
# Plot 3: Year vs. Price | |
for i in range(n_clusters): | |
cluster_data = df[df['cluster'] == i] | |
axs[1, 0].scatter(cluster_data['Year'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}') | |
axs[1, 0].set_xlabel('Year') | |
axs[1, 0].set_ylabel('Price') | |
axs[1, 0].set_title('Price to Year') | |
# Plot 4: Retail vs. Price | |
for i in range(n_clusters): | |
cluster_data = df[df['cluster'] == i] | |
axs[1, 1].scatter(cluster_data['Present_Price'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}') | |
axs[1, 1].set_xlabel('Retail') | |
axs[1, 1].set_ylabel('Price') | |
axs[1, 1].set_title('Price to Retail') | |
# Adjust the spacing between subplots | |
fig.tight_layout() | |
# Display the plot | |
st.pyplot(fig) |