Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import seaborn as sns | |
| import streamlit as st | |
| import matplotlib.pyplot as plt | |
| import plotly.graph_objects as go | |
| from sklearn.cluster import KMeans | |
| df = pd.read_csv("Spotify-2000.csv") | |
| # Main content | |
| st.title('Music Genres Clustering :saxophone:') | |
| st.image('https://www.cnet.com/a/img/resize/034ecacea4dbf0ae76529f37c2d0309b17b557ec/hub/2021/11/10/ab5e2d3b-9a4a-41f0-b2cd-6cee804ce823/genre-charts-covers.png?auto=webp&fit=crop&height=675&width=1200') | |
| st.write('**Data Preview:**') | |
| df = df.drop("Index", axis=1) | |
| st.dataframe(df) | |
| # GENRE CLUSTER | |
| st.write('**Visualizing Top 5 Genres:**') | |
| top_genres = df["Top Genre"].value_counts().nlargest(5).index | |
| def plot_3d_scatter(df): | |
| fig = go.Figure() | |
| for i in top_genres: | |
| fig.add_trace(go.Scatter3d(x=df[df["Top Genre"]==i]['Beats Per Minute (BPM)'], | |
| y=df[df["Top Genre"]==i]['Energy'], | |
| z=df[df["Top Genre"]==i]['Danceability'], | |
| mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i))) | |
| fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}') | |
| fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability')) | |
| return fig | |
| st.plotly_chart(plot_3d_scatter(df)) | |
| # KMEANS CLUSTER | |
| st.write('**Clustering by "Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness","Speechiness" using Kmeans.:**') | |
| df2 = df[["Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness", "Speechiness"]] | |
| kmeans = KMeans(n_clusters=10) | |
| clusters = kmeans.fit_predict(df2) | |
| df["Music Segments"] = clusters | |
| df["Music Segments"] = df["Music Segments"].map({i: f"Cluster {i+1}" for i in range(10)}) | |
| df["Music Segments"].fillna("Cluster 10", inplace=True) | |
| st.dataframe(df) | |
| st.write('**Visualizing:**') | |
| def plot_3d_scatter(df): | |
| fig = go.Figure() | |
| for i in df["Music Segments"].unique(): | |
| fig.add_trace(go.Scatter3d(x=df[df["Music Segments"]==i]['Beats Per Minute (BPM)'], | |
| y=df[df["Music Segments"]==i]['Energy'], | |
| z=df[df["Music Segments"]==i]['Danceability'], | |
| mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i))) | |
| fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}') | |
| fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability')) | |
| return fig | |
| st.plotly_chart(plot_3d_scatter(df)) | |
| # Scatter plot | |
| st.write('**Similarity between Top 5 Genres and Clusters:**') | |
| genre_cluster_df = pd.crosstab(df["Top Genre"], df["Music Segments"]) | |
| # Filter for top 5 genres | |
| genre_cluster_df = genre_cluster_df.loc[top_genres] | |
| # Prepare data for scatter plot | |
| scatter_data = genre_cluster_df.reset_index().melt(id_vars="Top Genre", var_name="Cluster", value_name="Frequency") | |
| scatter_data["Cluster"] = scatter_data["Cluster"].apply(lambda x: int(x.split()[1]) if x != "Cluster 10" else 10) | |
| # Plot | |
| plt.figure(figsize=(12, 8)) | |
| sns.scatterplot(data=scatter_data, x="Cluster", y="Frequency", hue="Top Genre", palette="Set1", s=100) | |
| plt.xlabel("Cluster") | |
| plt.ylabel("Frequency") | |
| plt.legend(title="Top Genre", bbox_to_anchor=(1.05, 1), loc='upper left') | |
| plt.savefig("scatter_plot.png", bbox_inches='tight') # Save the plot with tight bounding box | |
| st.pyplot(plt) | |