import streamlit as st import pandas as pd import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt df = pd.read_csv("mycardata.csv") # Main content st.title('Interactive Car Data Clustering :blue_car:') st.write('**Data Preview:**') st.dataframe(df) st.write('**Data Processed:**') df['Selling_Price'] *= 10000 df['Present_Price'] *= 10000 df = pd.get_dummies(df, columns=['Transmission', 'Seller_Type', 'Fuel_Type']) # Outlier clip_limits = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].quantile(0.97) df[['Selling_Price', 'Present_Price', 'Kms_Driven']] = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].clip(upper=clip_limits, axis=1) st.dataframe(df) n_clusters = st.slider('Pick Number Of Clusters',min_value=2, max_value=10, value=4) # Perform K-means clustering X = df[['Selling_Price', 'Kms_Driven']] kmeans = KMeans(n_clusters=n_clusters, random_state=42) df['cluster'] = kmeans.fit_predict(X) # Create a 2x2 grid of subplots fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 8)) # Define the colors for the clusters colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan'] # Plot 1: Price vs. Kilometers Driven for i in range(n_clusters): cluster_data = df[df['cluster'] == i] axs[0, 0].scatter(cluster_data['Selling_Price'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}') axs[0, 0].set_xlabel('Price') axs[0, 0].set_ylabel('Kilometers Driven') axs[0, 0].set_title('KM to Price') # Plot 2: Year vs. Kilometers Driven for i in range(n_clusters): cluster_data = df[df['cluster'] == i] axs[0, 1].scatter(cluster_data['Year'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}') axs[0, 1].set_xlabel('Year') axs[0, 1].set_ylabel('Kilometers Driven') axs[0, 1].set_title('KM to Years') # Plot 3: Year vs. Price for i in range(n_clusters): cluster_data = df[df['cluster'] == i] axs[1, 0].scatter(cluster_data['Year'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}') axs[1, 0].set_xlabel('Year') axs[1, 0].set_ylabel('Price') axs[1, 0].set_title('Price to Year') # Plot 4: Retail vs. Price for i in range(n_clusters): cluster_data = df[df['cluster'] == i] axs[1, 1].scatter(cluster_data['Present_Price'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}') axs[1, 1].set_xlabel('Retail') axs[1, 1].set_ylabel('Price') axs[1, 1].set_title('Price to Retail') # Adjust the spacing between subplots fig.tight_layout() # Display the plot st.pyplot(fig)