Commit
•
bde1bc4
1
Parent(s):
14503c5
Upload 3 files
Browse files- Spotify-2000.csv +0 -0
- app.py +75 -0
- requirements.txt +5 -0
Spotify-2000.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import seaborn as sns
|
3 |
+
import streamlit as st
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
from sklearn.cluster import KMeans
|
7 |
+
df = pd.read_csv("Spotify-2000.csv")
|
8 |
+
|
9 |
+
|
10 |
+
# Main content
|
11 |
+
st.title('Music Genres Clustering :saxophone:')
|
12 |
+
st.image('https://www.cnet.com/a/img/resize/034ecacea4dbf0ae76529f37c2d0309b17b557ec/hub/2021/11/10/ab5e2d3b-9a4a-41f0-b2cd-6cee804ce823/genre-charts-covers.png?auto=webp&fit=crop&height=675&width=1200')
|
13 |
+
st.write('**Data Preview:**')
|
14 |
+
df = df.drop("Index", axis=1)
|
15 |
+
st.dataframe(df)
|
16 |
+
|
17 |
+
# GENRE CLUSTER
|
18 |
+
st.write('**Visualizing Top 5 Genres:**')
|
19 |
+
top_genres = df["Top Genre"].value_counts().nlargest(5).index
|
20 |
+
def plot_3d_scatter(df):
|
21 |
+
fig = go.Figure()
|
22 |
+
for i in top_genres:
|
23 |
+
fig.add_trace(go.Scatter3d(x=df[df["Top Genre"]==i]['Beats Per Minute (BPM)'],
|
24 |
+
y=df[df["Top Genre"]==i]['Energy'],
|
25 |
+
z=df[df["Top Genre"]==i]['Danceability'],
|
26 |
+
mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i)))
|
27 |
+
fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}')
|
28 |
+
fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability'))
|
29 |
+
return fig
|
30 |
+
st.plotly_chart(plot_3d_scatter(df))
|
31 |
+
|
32 |
+
|
33 |
+
# KMEANS CLUSTER
|
34 |
+
st.write('**Clustering by "Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness","Speechiness" using Kmeans.:**')
|
35 |
+
df2 = df[["Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness", "Speechiness"]]
|
36 |
+
kmeans = KMeans(n_clusters=10)
|
37 |
+
clusters = kmeans.fit_predict(df2)
|
38 |
+
df["Music Segments"] = clusters
|
39 |
+
df["Music Segments"] = df["Music Segments"].map({i: f"Cluster {i+1}" for i in range(10)})
|
40 |
+
df["Music Segments"].fillna("Cluster 10", inplace=True)
|
41 |
+
st.dataframe(df)
|
42 |
+
|
43 |
+
st.write('**Visualizing:**')
|
44 |
+
def plot_3d_scatter(df):
|
45 |
+
fig = go.Figure()
|
46 |
+
for i in df["Music Segments"].unique():
|
47 |
+
fig.add_trace(go.Scatter3d(x=df[df["Music Segments"]==i]['Beats Per Minute (BPM)'],
|
48 |
+
y=df[df["Music Segments"]==i]['Energy'],
|
49 |
+
z=df[df["Music Segments"]==i]['Danceability'],
|
50 |
+
mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i)))
|
51 |
+
fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}')
|
52 |
+
fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability'))
|
53 |
+
return fig
|
54 |
+
st.plotly_chart(plot_3d_scatter(df))
|
55 |
+
|
56 |
+
|
57 |
+
# Scatter plot
|
58 |
+
st.write('**Similarity between Top 5 Genres and Clusters:**')
|
59 |
+
genre_cluster_df = pd.crosstab(df["Top Genre"], df["Music Segments"])
|
60 |
+
|
61 |
+
# Filter for top 5 genres
|
62 |
+
genre_cluster_df = genre_cluster_df.loc[top_genres]
|
63 |
+
|
64 |
+
# Prepare data for scatter plot
|
65 |
+
scatter_data = genre_cluster_df.reset_index().melt(id_vars="Top Genre", var_name="Cluster", value_name="Frequency")
|
66 |
+
scatter_data["Cluster"] = scatter_data["Cluster"].apply(lambda x: int(x.split()[1]) if x != "Cluster 10" else 10)
|
67 |
+
|
68 |
+
# Plot
|
69 |
+
plt.figure(figsize=(12, 8))
|
70 |
+
sns.scatterplot(data=scatter_data, x="Cluster", y="Frequency", hue="Top Genre", palette="Set1", s=100)
|
71 |
+
plt.xlabel("Cluster")
|
72 |
+
plt.ylabel("Frequency")
|
73 |
+
plt.legend(title="Top Genre", bbox_to_anchor=(1.05, 1), loc='upper left')
|
74 |
+
plt.savefig("scatter_plot.png", bbox_inches='tight') # Save the plot with tight bounding box
|
75 |
+
st.pyplot(plt)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
seaborn
|
3 |
+
matplotlib
|
4 |
+
plotly
|
5 |
+
scikit-learn
|