Spaces:

ibrahimnomad
/

Car_Data_Clustering

Sleeping

App Files Files Community

Car_Data_Clustering / app.py

ibrahimnomad

Update app.py

47737f6 about 2 months ago

raw history blame contribute delete

No virus

2.55 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt

	df = pd.read_csv("mycardata.csv")

	# Main content
	st.title('Interactive Car Data Clustering :blue_car:')

	st.write('Data Preview:')
	st.dataframe(df)

	st.write('Data Processed:')
	df['Selling_Price'] *= 10000
	df['Present_Price'] *= 10000
	df = pd.get_dummies(df, columns=['Transmission', 'Seller_Type', 'Fuel_Type'])
	# Outlier
	clip_limits = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].quantile(0.97)
	df[['Selling_Price', 'Present_Price', 'Kms_Driven']] = df[['Selling_Price', 'Present_Price', 'Kms_Driven']].clip(upper=clip_limits, axis=1)
	st.dataframe(df)

	n_clusters = st.slider('Pick Number Of Clusters',min_value=2, max_value=10, value=4)

	# Perform K-means clustering
	X = df[['Selling_Price', 'Kms_Driven']]
	kmeans = KMeans(n_clusters=n_clusters, random_state=42)
	df['cluster'] = kmeans.fit_predict(X)


	# Create a 2x2 grid of subplots
	fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))

	# Define the colors for the clusters
	colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'olive', 'cyan']

	# Plot 1: Price vs. Kilometers Driven
	for i in range(n_clusters):
	cluster_data = df[df['cluster'] == i]
	axs[0, 0].scatter(cluster_data['Selling_Price'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}')
	axs[0, 0].set_xlabel('Price')
	axs[0, 0].set_ylabel('Kilometers Driven')
	axs[0, 0].set_title('KM to Price')

	# Plot 2: Year vs. Kilometers Driven
	for i in range(n_clusters):
	cluster_data = df[df['cluster'] == i]
	axs[0, 1].scatter(cluster_data['Year'], cluster_data['Kms_Driven'], c=colors[i], label=f'Cluster {i+1}')
	axs[0, 1].set_xlabel('Year')
	axs[0, 1].set_ylabel('Kilometers Driven')
	axs[0, 1].set_title('KM to Years')

	# Plot 3: Year vs. Price
	for i in range(n_clusters):
	cluster_data = df[df['cluster'] == i]
	axs[1, 0].scatter(cluster_data['Year'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}')
	axs[1, 0].set_xlabel('Year')
	axs[1, 0].set_ylabel('Price')
	axs[1, 0].set_title('Price to Year')

	# Plot 4: Retail vs. Price
	for i in range(n_clusters):
	cluster_data = df[df['cluster'] == i]
	axs[1, 1].scatter(cluster_data['Present_Price'], cluster_data['Selling_Price'], c=colors[i], label=f'Cluster {i+1}')
	axs[1, 1].set_xlabel('Retail')
	axs[1, 1].set_ylabel('Price')
	axs[1, 1].set_title('Price to Retail')

	# Adjust the spacing between subplots
	fig.tight_layout()

	# Display the plot
	st.pyplot(fig)