ibrahimnomad commited on
Commit
bde1bc4
1 Parent(s): 14503c5

Upload 3 files

Browse files
Files changed (3) hide show
  1. Spotify-2000.csv +0 -0
  2. app.py +75 -0
  3. requirements.txt +5 -0
Spotify-2000.csv ADDED
The diff for this file is too large to render. See raw diff
 
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import seaborn as sns
3
+ import streamlit as st
4
+ import matplotlib.pyplot as plt
5
+ import plotly.graph_objects as go
6
+ from sklearn.cluster import KMeans
7
+ df = pd.read_csv("Spotify-2000.csv")
8
+
9
+
10
+ # Main content
11
+ st.title('Music Genres Clustering :saxophone:')
12
+ st.image('https://www.cnet.com/a/img/resize/034ecacea4dbf0ae76529f37c2d0309b17b557ec/hub/2021/11/10/ab5e2d3b-9a4a-41f0-b2cd-6cee804ce823/genre-charts-covers.png?auto=webp&fit=crop&height=675&width=1200')
13
+ st.write('**Data Preview:**')
14
+ df = df.drop("Index", axis=1)
15
+ st.dataframe(df)
16
+
17
+ # GENRE CLUSTER
18
+ st.write('**Visualizing Top 5 Genres:**')
19
+ top_genres = df["Top Genre"].value_counts().nlargest(5).index
20
+ def plot_3d_scatter(df):
21
+ fig = go.Figure()
22
+ for i in top_genres:
23
+ fig.add_trace(go.Scatter3d(x=df[df["Top Genre"]==i]['Beats Per Minute (BPM)'],
24
+ y=df[df["Top Genre"]==i]['Energy'],
25
+ z=df[df["Top Genre"]==i]['Danceability'],
26
+ mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i)))
27
+ fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}')
28
+ fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability'))
29
+ return fig
30
+ st.plotly_chart(plot_3d_scatter(df))
31
+
32
+
33
+ # KMEANS CLUSTER
34
+ st.write('**Clustering by "Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness","Speechiness" using Kmeans.:**')
35
+ df2 = df[["Beats Per Minute (BPM)", "Loudness (dB)", "Liveness", "Valence", "Acousticness", "Speechiness"]]
36
+ kmeans = KMeans(n_clusters=10)
37
+ clusters = kmeans.fit_predict(df2)
38
+ df["Music Segments"] = clusters
39
+ df["Music Segments"] = df["Music Segments"].map({i: f"Cluster {i+1}" for i in range(10)})
40
+ df["Music Segments"].fillna("Cluster 10", inplace=True)
41
+ st.dataframe(df)
42
+
43
+ st.write('**Visualizing:**')
44
+ def plot_3d_scatter(df):
45
+ fig = go.Figure()
46
+ for i in df["Music Segments"].unique():
47
+ fig.add_trace(go.Scatter3d(x=df[df["Music Segments"]==i]['Beats Per Minute (BPM)'],
48
+ y=df[df["Music Segments"]==i]['Energy'],
49
+ z=df[df["Music Segments"]==i]['Danceability'],
50
+ mode='markers', marker=dict(size=6, line=dict(width=1)), name=str(i)))
51
+ fig.update_traces(hovertemplate='BPM: %{x} <br>Energy: %{y} <br>Danceability: %{z}')
52
+ fig.update_layout(autosize=True, scene=dict(xaxis_title='BPM', yaxis_title='Energy', zaxis_title='Danceability'))
53
+ return fig
54
+ st.plotly_chart(plot_3d_scatter(df))
55
+
56
+
57
+ # Scatter plot
58
+ st.write('**Similarity between Top 5 Genres and Clusters:**')
59
+ genre_cluster_df = pd.crosstab(df["Top Genre"], df["Music Segments"])
60
+
61
+ # Filter for top 5 genres
62
+ genre_cluster_df = genre_cluster_df.loc[top_genres]
63
+
64
+ # Prepare data for scatter plot
65
+ scatter_data = genre_cluster_df.reset_index().melt(id_vars="Top Genre", var_name="Cluster", value_name="Frequency")
66
+ scatter_data["Cluster"] = scatter_data["Cluster"].apply(lambda x: int(x.split()[1]) if x != "Cluster 10" else 10)
67
+
68
+ # Plot
69
+ plt.figure(figsize=(12, 8))
70
+ sns.scatterplot(data=scatter_data, x="Cluster", y="Frequency", hue="Top Genre", palette="Set1", s=100)
71
+ plt.xlabel("Cluster")
72
+ plt.ylabel("Frequency")
73
+ plt.legend(title="Top Genre", bbox_to_anchor=(1.05, 1), loc='upper left')
74
+ plt.savefig("scatter_plot.png", bbox_inches='tight') # Save the plot with tight bounding box
75
+ st.pyplot(plt)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pandas
2
+ seaborn
3
+ matplotlib
4
+ plotly
5
+ scikit-learn