huathedev commited on
Commit
48760b2
1 Parent(s): 8f88050

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ data/clean_data.csv filter=lfs diff=lfs merge=lfs -text
Introduction.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit import session_state as session
3
+
4
+ # Configure Streamlit page
5
+ st.set_page_config(page_title="Song Recommender🎶", page_icon="🎶")
6
+
7
+ st.title("Song Recommender🎶")
8
+ st.markdown("Click on '**Recommender**' from the side panel to get started.")
9
+ st.markdown("**How does it work?**")
10
+ st.markdown(
11
+ "The songs come from the [Spotify and Genius Track Dataset](https://www.kaggle.com/datasets/saurabhshahane/spotgen-music-dataset) on Kaggle. The [k-Nearest Neighbor algorithm](https://scikit-learn.org/stable/modules/neighbors.html) is used to obtain recommendations, i.e., the top songs which are closest in distance to the set of parameter inputs specified by you."
12
+ )
13
+
14
+ st.markdown("This app will recommend you songs based on the characteristics below.")
15
+ st.markdown(
16
+ """
17
+ **Acousticness**: A metric describing the 'acousticness' of a song. 1.0 represents high confidence the song is acoustic.<br>
18
+
19
+ **Danceability**: Describes a song's suitability for dancing based on combination of elements including tempo, rhythm stability, beat strength, and overall regularity.
20
+ 0.0 is least danceable and 1.0 is most danceable.<br>
21
+
22
+ **Energy**: Measure of intensity and activity. Often, energetic songs feel fast, loud, and noisy.<br>
23
+
24
+ **Liveness**: A metric describing the likelihood that a track is a recording of a live performance.<br>
25
+
26
+ **Speechiness**: How much lyrics the track contains.<br>
27
+
28
+ **Valence**: A metric ranging from 0.0 to 1.0 describing the positivity conveyed by a track.<br>
29
+
30
+ Source: [Spotify Web API](https://developer.spotify.com/documentation/web-api/reference)
31
+ """,
32
+ unsafe_allow_html=True,
33
+ )
data/clean_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000dd9f72226c7d5839a3f9a22d4ada8a32124b3753ef9d55bfb29c94c4c85cc
3
+ size 23382638
data/preprocess_data.ipynb ADDED
@@ -0,0 +1,1301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "## Import Dependancies"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "metadata": {},
15
+ "outputs": [],
16
+ "source": [
17
+ "import pandas as pd"
18
+ ]
19
+ },
20
+ {
21
+ "attachments": {},
22
+ "cell_type": "markdown",
23
+ "metadata": {},
24
+ "source": [
25
+ "## Load Data Files"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 2,
31
+ "metadata": {},
32
+ "outputs": [],
33
+ "source": [
34
+ "dir = \"SpotGenTrack/Data Sources/\"\n",
35
+ "albums = pd.read_csv(dir + \"spotify_albums.csv\")\n",
36
+ "artists = pd.read_csv(dir + \"spotify_artists.csv\")\n",
37
+ "tracks = pd.read_csv(dir + \"spotify_tracks.csv\")"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 3,
43
+ "metadata": {},
44
+ "outputs": [
45
+ {
46
+ "data": {
47
+ "text/html": [
48
+ "<div>\n",
49
+ "<style scoped>\n",
50
+ " .dataframe tbody tr th:only-of-type {\n",
51
+ " vertical-align: middle;\n",
52
+ " }\n",
53
+ "\n",
54
+ " .dataframe tbody tr th {\n",
55
+ " vertical-align: top;\n",
56
+ " }\n",
57
+ "\n",
58
+ " .dataframe thead th {\n",
59
+ " text-align: right;\n",
60
+ " }\n",
61
+ "</style>\n",
62
+ "<table border=\"1\" class=\"dataframe\">\n",
63
+ " <thead>\n",
64
+ " <tr style=\"text-align: right;\">\n",
65
+ " <th></th>\n",
66
+ " <th>Unnamed: 0</th>\n",
67
+ " <th>album_type</th>\n",
68
+ " <th>artist_id</th>\n",
69
+ " <th>available_markets</th>\n",
70
+ " <th>external_urls</th>\n",
71
+ " <th>href</th>\n",
72
+ " <th>id</th>\n",
73
+ " <th>images</th>\n",
74
+ " <th>name</th>\n",
75
+ " <th>release_date</th>\n",
76
+ " <th>release_date_precision</th>\n",
77
+ " <th>total_tracks</th>\n",
78
+ " <th>track_id</th>\n",
79
+ " <th>track_name_prev</th>\n",
80
+ " <th>uri</th>\n",
81
+ " <th>type</th>\n",
82
+ " </tr>\n",
83
+ " </thead>\n",
84
+ " <tbody>\n",
85
+ " <tr>\n",
86
+ " <th>0</th>\n",
87
+ " <td>0</td>\n",
88
+ " <td>single</td>\n",
89
+ " <td>3DiDSECUqqY1AuBP8qtaIa</td>\n",
90
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
91
+ " <td>{'spotify': 'https://open.spotify.com/album/1g...</td>\n",
92
+ " <td>https://api.spotify.com/v1/albums/1gAM7M4rBwEb...</td>\n",
93
+ " <td>1gAM7M4rBwEbSPeAQR2nx1</td>\n",
94
+ " <td>[{'height': 640, 'url': 'https://i.scdn.co/ima...</td>\n",
95
+ " <td>If I Ain't Got You EP</td>\n",
96
+ " <td>2019-02-08</td>\n",
97
+ " <td>day</td>\n",
98
+ " <td>6</td>\n",
99
+ " <td>2iejTMy9XZ8Gaae0aQ2yl0</td>\n",
100
+ " <td>track_32</td>\n",
101
+ " <td>spotify:album:1gAM7M4rBwEbSPeAQR2nx1</td>\n",
102
+ " <td>album</td>\n",
103
+ " </tr>\n",
104
+ " <tr>\n",
105
+ " <th>1</th>\n",
106
+ " <td>1</td>\n",
107
+ " <td>album</td>\n",
108
+ " <td>6s1pCNXcbdtQJlsnM1hRIA</td>\n",
109
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
110
+ " <td>{'spotify': 'https://open.spotify.com/album/4K...</td>\n",
111
+ " <td>https://api.spotify.com/v1/albums/4KfJZV7WfolY...</td>\n",
112
+ " <td>4KfJZV7WfolYlxBzOTo66s</td>\n",
113
+ " <td>[{'height': 640, 'url': 'https://i.scdn.co/ima...</td>\n",
114
+ " <td>Shostakovich Symphony No.5 - Four Romances on ...</td>\n",
115
+ " <td>2019-03-01</td>\n",
116
+ " <td>day</td>\n",
117
+ " <td>8</td>\n",
118
+ " <td>1WQfghEjszJJ4H8MAWrQ2C</td>\n",
119
+ " <td>track_11</td>\n",
120
+ " <td>spotify:album:4KfJZV7WfolYlxBzOTo66s</td>\n",
121
+ " <td>album</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>2</th>\n",
125
+ " <td>2</td>\n",
126
+ " <td>single</td>\n",
127
+ " <td>5YjfNaHq05WrwldRe1QSBc</td>\n",
128
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
129
+ " <td>{'spotify': 'https://open.spotify.com/album/7n...</td>\n",
130
+ " <td>https://api.spotify.com/v1/albums/7nLYY7uAVUb5...</td>\n",
131
+ " <td>7nLYY7uAVUb57kpd7tZxnS</td>\n",
132
+ " <td>[{'height': 640, 'url': 'https://i.scdn.co/ima...</td>\n",
133
+ " <td>Take My Bass</td>\n",
134
+ " <td>2019-03-14</td>\n",
135
+ " <td>day</td>\n",
136
+ " <td>1</td>\n",
137
+ " <td>3jJKj4QTK3v18ZSwpk7AcV</td>\n",
138
+ " <td>track_15</td>\n",
139
+ " <td>spotify:album:7nLYY7uAVUb57kpd7tZxnS</td>\n",
140
+ " <td>album</td>\n",
141
+ " </tr>\n",
142
+ " <tr>\n",
143
+ " <th>3</th>\n",
144
+ " <td>3</td>\n",
145
+ " <td>single</td>\n",
146
+ " <td>2G9Vc16JCpnZmK4uGH46Fa</td>\n",
147
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
148
+ " <td>{'spotify': 'https://open.spotify.com/album/6p...</td>\n",
149
+ " <td>https://api.spotify.com/v1/albums/6p20Rt4x2Qn5...</td>\n",
150
+ " <td>6p20Rt4x2Qn5mUMRi1s6pj</td>\n",
151
+ " <td>[{'height': 640, 'url': 'https://i.scdn.co/ima...</td>\n",
152
+ " <td>Hypnotizing (Are U)</td>\n",
153
+ " <td>2016-11-16</td>\n",
154
+ " <td>day</td>\n",
155
+ " <td>1</td>\n",
156
+ " <td>1xGtDafUZbHyYC3Xarcbrj</td>\n",
157
+ " <td>track_46</td>\n",
158
+ " <td>spotify:album:6p20Rt4x2Qn5mUMRi1s6pj</td>\n",
159
+ " <td>album</td>\n",
160
+ " </tr>\n",
161
+ " <tr>\n",
162
+ " <th>4</th>\n",
163
+ " <td>4</td>\n",
164
+ " <td>single</td>\n",
165
+ " <td>2dwM9OcE4c3Ph1UBINSodx</td>\n",
166
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
167
+ " <td>{'spotify': 'https://open.spotify.com/album/1X...</td>\n",
168
+ " <td>https://api.spotify.com/v1/albums/1XeoOqC1q7U2...</td>\n",
169
+ " <td>1XeoOqC1q7U2iyLEQJ64cu</td>\n",
170
+ " <td>[{'height': 640, 'url': 'https://i.scdn.co/ima...</td>\n",
171
+ " <td>Sunshine</td>\n",
172
+ " <td>2018-07-20</td>\n",
173
+ " <td>day</td>\n",
174
+ " <td>1</td>\n",
175
+ " <td>0gWtsXvXOzAT6FtM3ur8in</td>\n",
176
+ " <td>track_10</td>\n",
177
+ " <td>spotify:album:1XeoOqC1q7U2iyLEQJ64cu</td>\n",
178
+ " <td>album</td>\n",
179
+ " </tr>\n",
180
+ " </tbody>\n",
181
+ "</table>\n",
182
+ "</div>"
183
+ ],
184
+ "text/plain": [
185
+ " Unnamed: 0 album_type artist_id \\\n",
186
+ "0 0 single 3DiDSECUqqY1AuBP8qtaIa \n",
187
+ "1 1 album 6s1pCNXcbdtQJlsnM1hRIA \n",
188
+ "2 2 single 5YjfNaHq05WrwldRe1QSBc \n",
189
+ "3 3 single 2G9Vc16JCpnZmK4uGH46Fa \n",
190
+ "4 4 single 2dwM9OcE4c3Ph1UBINSodx \n",
191
+ "\n",
192
+ " available_markets \\\n",
193
+ "0 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n",
194
+ "1 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n",
195
+ "2 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n",
196
+ "3 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n",
197
+ "4 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... \n",
198
+ "\n",
199
+ " external_urls \\\n",
200
+ "0 {'spotify': 'https://open.spotify.com/album/1g... \n",
201
+ "1 {'spotify': 'https://open.spotify.com/album/4K... \n",
202
+ "2 {'spotify': 'https://open.spotify.com/album/7n... \n",
203
+ "3 {'spotify': 'https://open.spotify.com/album/6p... \n",
204
+ "4 {'spotify': 'https://open.spotify.com/album/1X... \n",
205
+ "\n",
206
+ " href id \\\n",
207
+ "0 https://api.spotify.com/v1/albums/1gAM7M4rBwEb... 1gAM7M4rBwEbSPeAQR2nx1 \n",
208
+ "1 https://api.spotify.com/v1/albums/4KfJZV7WfolY... 4KfJZV7WfolYlxBzOTo66s \n",
209
+ "2 https://api.spotify.com/v1/albums/7nLYY7uAVUb5... 7nLYY7uAVUb57kpd7tZxnS \n",
210
+ "3 https://api.spotify.com/v1/albums/6p20Rt4x2Qn5... 6p20Rt4x2Qn5mUMRi1s6pj \n",
211
+ "4 https://api.spotify.com/v1/albums/1XeoOqC1q7U2... 1XeoOqC1q7U2iyLEQJ64cu \n",
212
+ "\n",
213
+ " images \\\n",
214
+ "0 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n",
215
+ "1 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n",
216
+ "2 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n",
217
+ "3 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n",
218
+ "4 [{'height': 640, 'url': 'https://i.scdn.co/ima... \n",
219
+ "\n",
220
+ " name release_date \\\n",
221
+ "0 If I Ain't Got You EP 2019-02-08 \n",
222
+ "1 Shostakovich Symphony No.5 - Four Romances on ... 2019-03-01 \n",
223
+ "2 Take My Bass 2019-03-14 \n",
224
+ "3 Hypnotizing (Are U) 2016-11-16 \n",
225
+ "4 Sunshine 2018-07-20 \n",
226
+ "\n",
227
+ " release_date_precision total_tracks track_id \\\n",
228
+ "0 day 6 2iejTMy9XZ8Gaae0aQ2yl0 \n",
229
+ "1 day 8 1WQfghEjszJJ4H8MAWrQ2C \n",
230
+ "2 day 1 3jJKj4QTK3v18ZSwpk7AcV \n",
231
+ "3 day 1 1xGtDafUZbHyYC3Xarcbrj \n",
232
+ "4 day 1 0gWtsXvXOzAT6FtM3ur8in \n",
233
+ "\n",
234
+ " track_name_prev uri type \n",
235
+ "0 track_32 spotify:album:1gAM7M4rBwEbSPeAQR2nx1 album \n",
236
+ "1 track_11 spotify:album:4KfJZV7WfolYlxBzOTo66s album \n",
237
+ "2 track_15 spotify:album:7nLYY7uAVUb57kpd7tZxnS album \n",
238
+ "3 track_46 spotify:album:6p20Rt4x2Qn5mUMRi1s6pj album \n",
239
+ "4 track_10 spotify:album:1XeoOqC1q7U2iyLEQJ64cu album "
240
+ ]
241
+ },
242
+ "metadata": {},
243
+ "output_type": "display_data"
244
+ },
245
+ {
246
+ "data": {
247
+ "text/plain": [
248
+ "Index(['Unnamed: 0', 'album_type', 'artist_id', 'available_markets',\n",
249
+ " 'external_urls', 'href', 'id', 'images', 'name', 'release_date',\n",
250
+ " 'release_date_precision', 'total_tracks', 'track_id', 'track_name_prev',\n",
251
+ " 'uri', 'type'],\n",
252
+ " dtype='object')"
253
+ ]
254
+ },
255
+ "execution_count": 3,
256
+ "metadata": {},
257
+ "output_type": "execute_result"
258
+ }
259
+ ],
260
+ "source": [
261
+ "# Inspect albums\n",
262
+ "display(albums.head())\n",
263
+ "\n",
264
+ "# Show columns\n",
265
+ "albums.columns"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": 4,
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "data": {
275
+ "text/html": [
276
+ "<div>\n",
277
+ "<style scoped>\n",
278
+ " .dataframe tbody tr th:only-of-type {\n",
279
+ " vertical-align: middle;\n",
280
+ " }\n",
281
+ "\n",
282
+ " .dataframe tbody tr th {\n",
283
+ " vertical-align: top;\n",
284
+ " }\n",
285
+ "\n",
286
+ " .dataframe thead th {\n",
287
+ " text-align: right;\n",
288
+ " }\n",
289
+ "</style>\n",
290
+ "<table border=\"1\" class=\"dataframe\">\n",
291
+ " <thead>\n",
292
+ " <tr style=\"text-align: right;\">\n",
293
+ " <th></th>\n",
294
+ " <th>Unnamed: 0</th>\n",
295
+ " <th>artist_popularity</th>\n",
296
+ " <th>followers</th>\n",
297
+ " <th>genres</th>\n",
298
+ " <th>id</th>\n",
299
+ " <th>name</th>\n",
300
+ " <th>track_id</th>\n",
301
+ " <th>track_name_prev</th>\n",
302
+ " <th>type</th>\n",
303
+ " </tr>\n",
304
+ " </thead>\n",
305
+ " <tbody>\n",
306
+ " <tr>\n",
307
+ " <th>0</th>\n",
308
+ " <td>0</td>\n",
309
+ " <td>44</td>\n",
310
+ " <td>23230</td>\n",
311
+ " <td>['sertanejo', 'sertanejo pop', 'sertanejo trad...</td>\n",
312
+ " <td>4mGnpjhqgx4RUdsIJiURdo</td>\n",
313
+ " <td>Juliano Cezar</td>\n",
314
+ " <td>0wmDmAILuW9e2aRttkl4aC</td>\n",
315
+ " <td>track_9</td>\n",
316
+ " <td>artist</td>\n",
317
+ " </tr>\n",
318
+ " <tr>\n",
319
+ " <th>1</th>\n",
320
+ " <td>1</td>\n",
321
+ " <td>22</td>\n",
322
+ " <td>313</td>\n",
323
+ " <td>[]</td>\n",
324
+ " <td>1dLnVku4VQUOLswwDFvRc9</td>\n",
325
+ " <td>The Grenadines</td>\n",
326
+ " <td>4wqwj0gA8qPZKLl5WVqXml</td>\n",
327
+ " <td>track_30</td>\n",
328
+ " <td>artist</td>\n",
329
+ " </tr>\n",
330
+ " <tr>\n",
331
+ " <th>2</th>\n",
332
+ " <td>2</td>\n",
333
+ " <td>26</td>\n",
334
+ " <td>1596</td>\n",
335
+ " <td>['danish pop rock']</td>\n",
336
+ " <td>6YVY310fjfUzKi8hiqR7iK</td>\n",
337
+ " <td>Gangway</td>\n",
338
+ " <td>1bFqWDbvHmZe2f4Nf9qaD8</td>\n",
339
+ " <td>track_38</td>\n",
340
+ " <td>artist</td>\n",
341
+ " </tr>\n",
342
+ " <tr>\n",
343
+ " <th>3</th>\n",
344
+ " <td>3</td>\n",
345
+ " <td>31</td>\n",
346
+ " <td>149</td>\n",
347
+ " <td>['uk alternative pop']</td>\n",
348
+ " <td>2VElyouiCfoYPDJluzwJwK</td>\n",
349
+ " <td>FADES</td>\n",
350
+ " <td>3MFSUBAidPzRBbIS7BDj1S</td>\n",
351
+ " <td>track_34</td>\n",
352
+ " <td>artist</td>\n",
353
+ " </tr>\n",
354
+ " <tr>\n",
355
+ " <th>4</th>\n",
356
+ " <td>4</td>\n",
357
+ " <td>21</td>\n",
358
+ " <td>11</td>\n",
359
+ " <td>['french baroque']</td>\n",
360
+ " <td>4agVy03qW8juSysCTUOuDI</td>\n",
361
+ " <td>Jean-Pierre Guignon</td>\n",
362
+ " <td>2r3q57FhxdsCyYr0kuDq4b</td>\n",
363
+ " <td>track_26</td>\n",
364
+ " <td>artist</td>\n",
365
+ " </tr>\n",
366
+ " </tbody>\n",
367
+ "</table>\n",
368
+ "</div>"
369
+ ],
370
+ "text/plain": [
371
+ " Unnamed: 0 artist_popularity followers \\\n",
372
+ "0 0 44 23230 \n",
373
+ "1 1 22 313 \n",
374
+ "2 2 26 1596 \n",
375
+ "3 3 31 149 \n",
376
+ "4 4 21 11 \n",
377
+ "\n",
378
+ " genres id \\\n",
379
+ "0 ['sertanejo', 'sertanejo pop', 'sertanejo trad... 4mGnpjhqgx4RUdsIJiURdo \n",
380
+ "1 [] 1dLnVku4VQUOLswwDFvRc9 \n",
381
+ "2 ['danish pop rock'] 6YVY310fjfUzKi8hiqR7iK \n",
382
+ "3 ['uk alternative pop'] 2VElyouiCfoYPDJluzwJwK \n",
383
+ "4 ['french baroque'] 4agVy03qW8juSysCTUOuDI \n",
384
+ "\n",
385
+ " name track_id track_name_prev type \n",
386
+ "0 Juliano Cezar 0wmDmAILuW9e2aRttkl4aC track_9 artist \n",
387
+ "1 The Grenadines 4wqwj0gA8qPZKLl5WVqXml track_30 artist \n",
388
+ "2 Gangway 1bFqWDbvHmZe2f4Nf9qaD8 track_38 artist \n",
389
+ "3 FADES 3MFSUBAidPzRBbIS7BDj1S track_34 artist \n",
390
+ "4 Jean-Pierre Guignon 2r3q57FhxdsCyYr0kuDq4b track_26 artist "
391
+ ]
392
+ },
393
+ "metadata": {},
394
+ "output_type": "display_data"
395
+ },
396
+ {
397
+ "data": {
398
+ "text/plain": [
399
+ "Index(['Unnamed: 0', 'artist_popularity', 'followers', 'genres', 'id', 'name',\n",
400
+ " 'track_id', 'track_name_prev', 'type'],\n",
401
+ " dtype='object')"
402
+ ]
403
+ },
404
+ "execution_count": 4,
405
+ "metadata": {},
406
+ "output_type": "execute_result"
407
+ }
408
+ ],
409
+ "source": [
410
+ "# Inspect artists\n",
411
+ "display(artists.head())\n",
412
+ "\n",
413
+ "# Show columns\n",
414
+ "artists.columns"
415
+ ]
416
+ },
417
+ {
418
+ "cell_type": "code",
419
+ "execution_count": 5,
420
+ "metadata": {},
421
+ "outputs": [
422
+ {
423
+ "data": {
424
+ "text/html": [
425
+ "<div>\n",
426
+ "<style scoped>\n",
427
+ " .dataframe tbody tr th:only-of-type {\n",
428
+ " vertical-align: middle;\n",
429
+ " }\n",
430
+ "\n",
431
+ " .dataframe tbody tr th {\n",
432
+ " vertical-align: top;\n",
433
+ " }\n",
434
+ "\n",
435
+ " .dataframe thead th {\n",
436
+ " text-align: right;\n",
437
+ " }\n",
438
+ "</style>\n",
439
+ "<table border=\"1\" class=\"dataframe\">\n",
440
+ " <thead>\n",
441
+ " <tr style=\"text-align: right;\">\n",
442
+ " <th></th>\n",
443
+ " <th>Unnamed: 0</th>\n",
444
+ " <th>acousticness</th>\n",
445
+ " <th>album_id</th>\n",
446
+ " <th>analysis_url</th>\n",
447
+ " <th>artists_id</th>\n",
448
+ " <th>available_markets</th>\n",
449
+ " <th>country</th>\n",
450
+ " <th>danceability</th>\n",
451
+ " <th>disc_number</th>\n",
452
+ " <th>duration_ms</th>\n",
453
+ " <th>...</th>\n",
454
+ " <th>preview_url</th>\n",
455
+ " <th>speechiness</th>\n",
456
+ " <th>tempo</th>\n",
457
+ " <th>time_signature</th>\n",
458
+ " <th>track_href</th>\n",
459
+ " <th>track_name_prev</th>\n",
460
+ " <th>track_number</th>\n",
461
+ " <th>uri</th>\n",
462
+ " <th>valence</th>\n",
463
+ " <th>type</th>\n",
464
+ " </tr>\n",
465
+ " </thead>\n",
466
+ " <tbody>\n",
467
+ " <tr>\n",
468
+ " <th>0</th>\n",
469
+ " <td>0</td>\n",
470
+ " <td>0.294</td>\n",
471
+ " <td>0D3QufeCudpQANOR7luqdr</td>\n",
472
+ " <td>https://api.spotify.com/v1/audio-analysis/5qlj...</td>\n",
473
+ " <td>['3mxJuHRn2ZWD5OofvJtDZY']</td>\n",
474
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
475
+ " <td>BE</td>\n",
476
+ " <td>0.698</td>\n",
477
+ " <td>1.0</td>\n",
478
+ " <td>235584.0</td>\n",
479
+ " <td>...</td>\n",
480
+ " <td>https://p.scdn.co/mp3-preview/1b05a902da3a251d...</td>\n",
481
+ " <td>0.0262</td>\n",
482
+ " <td>115.018</td>\n",
483
+ " <td>4.0</td>\n",
484
+ " <td>https://api.spotify.com/v1/tracks/5qljLQuKnNJf...</td>\n",
485
+ " <td>track_14</td>\n",
486
+ " <td>1.0</td>\n",
487
+ " <td>spotify:track:5qljLQuKnNJf4F4vfxQB0V</td>\n",
488
+ " <td>0.6220</td>\n",
489
+ " <td>track</td>\n",
490
+ " </tr>\n",
491
+ " <tr>\n",
492
+ " <th>1</th>\n",
493
+ " <td>1</td>\n",
494
+ " <td>0.863</td>\n",
495
+ " <td>1bcqsH5UyTBzmh9YizdsBE</td>\n",
496
+ " <td>https://api.spotify.com/v1/audio-analysis/3VAX...</td>\n",
497
+ " <td>['4xWMewm6CYMstu0sPgd9jJ']</td>\n",
498
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
499
+ " <td>BE</td>\n",
500
+ " <td>0.719</td>\n",
501
+ " <td>1.0</td>\n",
502
+ " <td>656960.0</td>\n",
503
+ " <td>...</td>\n",
504
+ " <td>https://p.scdn.co/mp3-preview/d8140736a6131cb5...</td>\n",
505
+ " <td>0.9220</td>\n",
506
+ " <td>115.075</td>\n",
507
+ " <td>3.0</td>\n",
508
+ " <td>https://api.spotify.com/v1/tracks/3VAX2MJdmdqA...</td>\n",
509
+ " <td>track_3</td>\n",
510
+ " <td>3.0</td>\n",
511
+ " <td>spotify:track:3VAX2MJdmdqARLSU5hPMpm</td>\n",
512
+ " <td>0.5890</td>\n",
513
+ " <td>track</td>\n",
514
+ " </tr>\n",
515
+ " <tr>\n",
516
+ " <th>2</th>\n",
517
+ " <td>2</td>\n",
518
+ " <td>0.750</td>\n",
519
+ " <td>4tKijjmxGClg4JOLAyo2qE</td>\n",
520
+ " <td>https://api.spotify.com/v1/audio-analysis/1L3Y...</td>\n",
521
+ " <td>['3hYaK5FF3YAglCj5HZgBnP']</td>\n",
522
+ " <td>['GB']</td>\n",
523
+ " <td>BE</td>\n",
524
+ " <td>0.466</td>\n",
525
+ " <td>1.0</td>\n",
526
+ " <td>492840.0</td>\n",
527
+ " <td>...</td>\n",
528
+ " <td>https://p.scdn.co/mp3-preview/c8af28fb15185b18...</td>\n",
529
+ " <td>0.9440</td>\n",
530
+ " <td>79.565</td>\n",
531
+ " <td>4.0</td>\n",
532
+ " <td>https://api.spotify.com/v1/tracks/1L3YAhsEMrGV...</td>\n",
533
+ " <td>track_4</td>\n",
534
+ " <td>4.0</td>\n",
535
+ " <td>spotify:track:1L3YAhsEMrGVvCgDXj2TYn</td>\n",
536
+ " <td>0.0850</td>\n",
537
+ " <td>track</td>\n",
538
+ " </tr>\n",
539
+ " <tr>\n",
540
+ " <th>3</th>\n",
541
+ " <td>3</td>\n",
542
+ " <td>0.763</td>\n",
543
+ " <td>6FeJF5r8roonnKraJxr4oB</td>\n",
544
+ " <td>https://api.spotify.com/v1/audio-analysis/6aCe...</td>\n",
545
+ " <td>['2KQsUB9DRBcJk17JWX1eXD']</td>\n",
546
+ " <td>['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH...</td>\n",
547
+ " <td>BE</td>\n",
548
+ " <td>0.719</td>\n",
549
+ " <td>1.0</td>\n",
550
+ " <td>316578.0</td>\n",
551
+ " <td>...</td>\n",
552
+ " <td>https://p.scdn.co/mp3-preview/7629b8e9f31f6e9b...</td>\n",
553
+ " <td>0.9380</td>\n",
554
+ " <td>112.822</td>\n",
555
+ " <td>3.0</td>\n",
556
+ " <td>https://api.spotify.com/v1/tracks/6aCe9zzoZmCo...</td>\n",
557
+ " <td>track_9</td>\n",
558
+ " <td>1.0</td>\n",
559
+ " <td>spotify:track:6aCe9zzoZmCojX7bbgKKtf</td>\n",
560
+ " <td>0.5330</td>\n",
561
+ " <td>track</td>\n",
562
+ " </tr>\n",
563
+ " <tr>\n",
564
+ " <th>4</th>\n",
565
+ " <td>4</td>\n",
566
+ " <td>0.770</td>\n",
567
+ " <td>4tKijjmxGClg4JOLAyo2qE</td>\n",
568
+ " <td>https://api.spotify.com/v1/audio-analysis/1Vo8...</td>\n",
569
+ " <td>['3hYaK5FF3YAglCj5HZgBnP']</td>\n",
570
+ " <td>['GB']</td>\n",
571
+ " <td>BE</td>\n",
572
+ " <td>0.460</td>\n",
573
+ " <td>1.0</td>\n",
574
+ " <td>558880.0</td>\n",
575
+ " <td>...</td>\n",
576
+ " <td>https://p.scdn.co/mp3-preview/32be593c0eb82868...</td>\n",
577
+ " <td>0.9430</td>\n",
578
+ " <td>81.260</td>\n",
579
+ " <td>4.0</td>\n",
580
+ " <td>https://api.spotify.com/v1/tracks/1Vo802A38tPF...</td>\n",
581
+ " <td>track_2</td>\n",
582
+ " <td>2.0</td>\n",
583
+ " <td>spotify:track:1Vo802A38tPFHmje1h91um</td>\n",
584
+ " <td>0.0906</td>\n",
585
+ " <td>track</td>\n",
586
+ " </tr>\n",
587
+ " </tbody>\n",
588
+ "</table>\n",
589
+ "<p>5 rows × 32 columns</p>\n",
590
+ "</div>"
591
+ ],
592
+ "text/plain": [
593
+ " Unnamed: 0 acousticness album_id \\\n",
594
+ "0 0 0.294 0D3QufeCudpQANOR7luqdr \n",
595
+ "1 1 0.863 1bcqsH5UyTBzmh9YizdsBE \n",
596
+ "2 2 0.750 4tKijjmxGClg4JOLAyo2qE \n",
597
+ "3 3 0.763 6FeJF5r8roonnKraJxr4oB \n",
598
+ "4 4 0.770 4tKijjmxGClg4JOLAyo2qE \n",
599
+ "\n",
600
+ " analysis_url \\\n",
601
+ "0 https://api.spotify.com/v1/audio-analysis/5qlj... \n",
602
+ "1 https://api.spotify.com/v1/audio-analysis/3VAX... \n",
603
+ "2 https://api.spotify.com/v1/audio-analysis/1L3Y... \n",
604
+ "3 https://api.spotify.com/v1/audio-analysis/6aCe... \n",
605
+ "4 https://api.spotify.com/v1/audio-analysis/1Vo8... \n",
606
+ "\n",
607
+ " artists_id \\\n",
608
+ "0 ['3mxJuHRn2ZWD5OofvJtDZY'] \n",
609
+ "1 ['4xWMewm6CYMstu0sPgd9jJ'] \n",
610
+ "2 ['3hYaK5FF3YAglCj5HZgBnP'] \n",
611
+ "3 ['2KQsUB9DRBcJk17JWX1eXD'] \n",
612
+ "4 ['3hYaK5FF3YAglCj5HZgBnP'] \n",
613
+ "\n",
614
+ " available_markets country danceability \\\n",
615
+ "0 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.698 \n",
616
+ "1 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.719 \n",
617
+ "2 ['GB'] BE 0.466 \n",
618
+ "3 ['AD', 'AE', 'AR', 'AT', 'AU', 'BE', 'BG', 'BH... BE 0.719 \n",
619
+ "4 ['GB'] BE 0.460 \n",
620
+ "\n",
621
+ " disc_number duration_ms ... \\\n",
622
+ "0 1.0 235584.0 ... \n",
623
+ "1 1.0 656960.0 ... \n",
624
+ "2 1.0 492840.0 ... \n",
625
+ "3 1.0 316578.0 ... \n",
626
+ "4 1.0 558880.0 ... \n",
627
+ "\n",
628
+ " preview_url speechiness tempo \\\n",
629
+ "0 https://p.scdn.co/mp3-preview/1b05a902da3a251d... 0.0262 115.018 \n",
630
+ "1 https://p.scdn.co/mp3-preview/d8140736a6131cb5... 0.9220 115.075 \n",
631
+ "2 https://p.scdn.co/mp3-preview/c8af28fb15185b18... 0.9440 79.565 \n",
632
+ "3 https://p.scdn.co/mp3-preview/7629b8e9f31f6e9b... 0.9380 112.822 \n",
633
+ "4 https://p.scdn.co/mp3-preview/32be593c0eb82868... 0.9430 81.260 \n",
634
+ "\n",
635
+ " time_signature track_href \\\n",
636
+ "0 4.0 https://api.spotify.com/v1/tracks/5qljLQuKnNJf... \n",
637
+ "1 3.0 https://api.spotify.com/v1/tracks/3VAX2MJdmdqA... \n",
638
+ "2 4.0 https://api.spotify.com/v1/tracks/1L3YAhsEMrGV... \n",
639
+ "3 3.0 https://api.spotify.com/v1/tracks/6aCe9zzoZmCo... \n",
640
+ "4 4.0 https://api.spotify.com/v1/tracks/1Vo802A38tPF... \n",
641
+ "\n",
642
+ " track_name_prev track_number uri \\\n",
643
+ "0 track_14 1.0 spotify:track:5qljLQuKnNJf4F4vfxQB0V \n",
644
+ "1 track_3 3.0 spotify:track:3VAX2MJdmdqARLSU5hPMpm \n",
645
+ "2 track_4 4.0 spotify:track:1L3YAhsEMrGVvCgDXj2TYn \n",
646
+ "3 track_9 1.0 spotify:track:6aCe9zzoZmCojX7bbgKKtf \n",
647
+ "4 track_2 2.0 spotify:track:1Vo802A38tPFHmje1h91um \n",
648
+ "\n",
649
+ " valence type \n",
650
+ "0 0.6220 track \n",
651
+ "1 0.5890 track \n",
652
+ "2 0.0850 track \n",
653
+ "3 0.5330 track \n",
654
+ "4 0.0906 track \n",
655
+ "\n",
656
+ "[5 rows x 32 columns]"
657
+ ]
658
+ },
659
+ "metadata": {},
660
+ "output_type": "display_data"
661
+ },
662
+ {
663
+ "data": {
664
+ "text/plain": [
665
+ "Index(['Unnamed: 0', 'acousticness', 'album_id', 'analysis_url', 'artists_id',\n",
666
+ " 'available_markets', 'country', 'danceability', 'disc_number',\n",
667
+ " 'duration_ms', 'energy', 'href', 'id', 'instrumentalness', 'key',\n",
668
+ " 'liveness', 'loudness', 'lyrics', 'mode', 'name', 'playlist',\n",
669
+ " 'popularity', 'preview_url', 'speechiness', 'tempo', 'time_signature',\n",
670
+ " 'track_href', 'track_name_prev', 'track_number', 'uri', 'valence',\n",
671
+ " 'type'],\n",
672
+ " dtype='object')"
673
+ ]
674
+ },
675
+ "execution_count": 5,
676
+ "metadata": {},
677
+ "output_type": "execute_result"
678
+ }
679
+ ],
680
+ "source": [
681
+ "# Inspect tracks\n",
682
+ "display(tracks.head())\n",
683
+ "\n",
684
+ "# Show columns\n",
685
+ "tracks.columns"
686
+ ]
687
+ },
688
+ {
689
+ "cell_type": "code",
690
+ "execution_count": 8,
691
+ "metadata": {},
692
+ "outputs": [],
693
+ "source": [
694
+ "## Combine three files into \n",
695
+ "# drop irrelevant columns\n",
696
+ "# get only tracks after 1990\n",
697
+ "def join_genre_and_date(artist_df, album_df, track_df):\n",
698
+ " album = album_df.rename(columns={'id':\"album_id\"}).set_index('album_id')\n",
699
+ " artist = artist_df.rename(columns={'id':\"artists_id\",'name':\"artists_name\"}).set_index('artists_id')\n",
700
+ " track = track_df.set_index('album_id').join(album['release_date'], on='album_id' )\n",
701
+ " track.artists_id = track.artists_id.apply(lambda x: x[2:-2])\n",
702
+ " track = track.set_index('artists_id').join(artist[['artists_name','genres']], on='artists_id' )\n",
703
+ " track.reset_index(drop=False, inplace=True)\n",
704
+ " track['release_year'] = pd.to_datetime(track.release_date).dt.year\n",
705
+ " track.drop(columns = ['Unnamed: 0','country','track_name_prev','track_number','type'], inplace = True)\n",
706
+ " \n",
707
+ " return track"
708
+ ]
709
+ },
710
+ {
711
+ "cell_type": "code",
712
+ "execution_count": 9,
713
+ "metadata": {},
714
+ "outputs": [],
715
+ "source": [
716
+ "def get_filtered_track_df(df, genres_to_include):\n",
717
+ " df['genres'] = df.genres.apply(lambda x: [i[1:-1] for i in str(x)[1:-1].split(\", \")])\n",
718
+ " df_exploded = df.explode(\"genres\")[df.explode(\"genres\")[\"genres\"].isin(genres_to_include)]\n",
719
+ " df_exploded.loc[df_exploded[\"genres\"]==\"korean pop\", \"genres\"] = \"k-pop\"\n",
720
+ " df_exploded_indices = list(df_exploded.index.unique())\n",
721
+ " df = df[df.index.isin(df_exploded_indices)]\n",
722
+ " df = df.reset_index(drop=True)\n",
723
+ " \n",
724
+ " return df"
725
+ ]
726
+ },
727
+ {
728
+ "cell_type": "code",
729
+ "execution_count": 10,
730
+ "metadata": {},
731
+ "outputs": [],
732
+ "source": [
733
+ "track_with_year_and_genre = join_genre_and_date(artists, albums, tracks)\n",
734
+ "genres_to_include = genres = ['dance pop', 'electronic', 'electropop', 'hip hop', 'jazz', 'k-pop', 'latin', 'pop', 'pop rap', 'r&b', 'rock']\n",
735
+ "filtered_track_df = get_filtered_track_df(track_with_year_and_genre, genres_to_include)"
736
+ ]
737
+ },
738
+ {
739
+ "cell_type": "code",
740
+ "execution_count": 11,
741
+ "metadata": {},
742
+ "outputs": [],
743
+ "source": [
744
+ "filtered_track_df[\"uri\"] = filtered_track_df[\"uri\"].str.replace(\"spotify:track:\", \"\")\n",
745
+ "filtered_track_df = filtered_track_df.drop(columns=['analysis_url', 'available_markets'])"
746
+ ]
747
+ },
748
+ {
749
+ "cell_type": "code",
750
+ "execution_count": 12,
751
+ "metadata": {},
752
+ "outputs": [
753
+ {
754
+ "data": {
755
+ "text/html": [
756
+ "<div>\n",
757
+ "<style scoped>\n",
758
+ " .dataframe tbody tr th:only-of-type {\n",
759
+ " vertical-align: middle;\n",
760
+ " }\n",
761
+ "\n",
762
+ " .dataframe tbody tr th {\n",
763
+ " vertical-align: top;\n",
764
+ " }\n",
765
+ "\n",
766
+ " .dataframe thead th {\n",
767
+ " text-align: right;\n",
768
+ " }\n",
769
+ "</style>\n",
770
+ "<table border=\"1\" class=\"dataframe\">\n",
771
+ " <thead>\n",
772
+ " <tr style=\"text-align: right;\">\n",
773
+ " <th></th>\n",
774
+ " <th>artists_id</th>\n",
775
+ " <th>acousticness</th>\n",
776
+ " <th>danceability</th>\n",
777
+ " <th>disc_number</th>\n",
778
+ " <th>duration_ms</th>\n",
779
+ " <th>energy</th>\n",
780
+ " <th>href</th>\n",
781
+ " <th>id</th>\n",
782
+ " <th>instrumentalness</th>\n",
783
+ " <th>key</th>\n",
784
+ " <th>...</th>\n",
785
+ " <th>speechiness</th>\n",
786
+ " <th>tempo</th>\n",
787
+ " <th>time_signature</th>\n",
788
+ " <th>track_href</th>\n",
789
+ " <th>uri</th>\n",
790
+ " <th>valence</th>\n",
791
+ " <th>release_date</th>\n",
792
+ " <th>artists_name</th>\n",
793
+ " <th>genres</th>\n",
794
+ " <th>release_year</th>\n",
795
+ " </tr>\n",
796
+ " </thead>\n",
797
+ " <tbody>\n",
798
+ " <tr>\n",
799
+ " <th>0</th>\n",
800
+ " <td>68WwJXWrpo1yVOOIZjLSeT</td>\n",
801
+ " <td>0.0268</td>\n",
802
+ " <td>0.506</td>\n",
803
+ " <td>1.0</td>\n",
804
+ " <td>248777.0</td>\n",
805
+ " <td>0.741</td>\n",
806
+ " <td>https://api.spotify.com/v1/tracks/0UATU9OJxh4m...</td>\n",
807
+ " <td>0UATU9OJxh4m3fwDljdGZn</td>\n",
808
+ " <td>0.000027</td>\n",
809
+ " <td>1.0</td>\n",
810
+ " <td>...</td>\n",
811
+ " <td>0.0349</td>\n",
812
+ " <td>94.042</td>\n",
813
+ " <td>4.0</td>\n",
814
+ " <td>https://api.spotify.com/v1/tracks/0UATU9OJxh4m...</td>\n",
815
+ " <td>0UATU9OJxh4m3fwDljdGZn</td>\n",
816
+ " <td>0.236</td>\n",
817
+ " <td>2018-09-28</td>\n",
818
+ " <td>Evalyn</td>\n",
819
+ " <td>[electropop, indie electro-pop, indie poptimis...</td>\n",
820
+ " <td>2018</td>\n",
821
+ " </tr>\n",
822
+ " <tr>\n",
823
+ " <th>1</th>\n",
824
+ " <td>09xj0S68Y1OU1vHMCZAIvz</td>\n",
825
+ " <td>0.5050</td>\n",
826
+ " <td>0.487</td>\n",
827
+ " <td>1.0</td>\n",
828
+ " <td>171573.0</td>\n",
829
+ " <td>0.297</td>\n",
830
+ " <td>https://api.spotify.com/v1/tracks/4JH1M62gVDND...</td>\n",
831
+ " <td>4JH1M62gVDNDhDAUiQB3Qv</td>\n",
832
+ " <td>0.000052</td>\n",
833
+ " <td>11.0</td>\n",
834
+ " <td>...</td>\n",
835
+ " <td>0.0915</td>\n",
836
+ " <td>185.912</td>\n",
837
+ " <td>3.0</td>\n",
838
+ " <td>https://api.spotify.com/v1/tracks/4JH1M62gVDND...</td>\n",
839
+ " <td>4JH1M62gVDNDhDAUiQB3Qv</td>\n",
840
+ " <td>0.289</td>\n",
841
+ " <td>2001-08-21</td>\n",
842
+ " <td>Café Tacvba</td>\n",
843
+ " <td>[latin, latin alternative, latin rock, mexican...</td>\n",
844
+ " <td>2001</td>\n",
845
+ " </tr>\n",
846
+ " <tr>\n",
847
+ " <th>2</th>\n",
848
+ " <td>6pSsE5y0uJMwYj83KrPyf9</td>\n",
849
+ " <td>0.1330</td>\n",
850
+ " <td>0.629</td>\n",
851
+ " <td>1.0</td>\n",
852
+ " <td>207396.0</td>\n",
853
+ " <td>0.706</td>\n",
854
+ " <td>https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU...</td>\n",
855
+ " <td>0h7Ld5CvgzaUN1zA3tdyPq</td>\n",
856
+ " <td>0.000000</td>\n",
857
+ " <td>1.0</td>\n",
858
+ " <td>...</td>\n",
859
+ " <td>0.4360</td>\n",
860
+ " <td>81.220</td>\n",
861
+ " <td>4.0</td>\n",
862
+ " <td>https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU...</td>\n",
863
+ " <td>0h7Ld5CvgzaUN1zA3tdyPq</td>\n",
864
+ " <td>0.543</td>\n",
865
+ " <td>2019-01-25</td>\n",
866
+ " <td>Dawn Richard</td>\n",
867
+ " <td>[alternative r&amp;b, deep pop r&amp;b, escape room, h...</td>\n",
868
+ " <td>2019</td>\n",
869
+ " </tr>\n",
870
+ " <tr>\n",
871
+ " <th>3</th>\n",
872
+ " <td>7slfeZO9LsJbWgpkIoXBUJ</td>\n",
873
+ " <td>0.4060</td>\n",
874
+ " <td>0.590</td>\n",
875
+ " <td>1.0</td>\n",
876
+ " <td>279000.0</td>\n",
877
+ " <td>0.597</td>\n",
878
+ " <td>https://api.spotify.com/v1/tracks/4S1bYWrLOC8s...</td>\n",
879
+ " <td>4S1bYWrLOC8smuy8kJzxKQ</td>\n",
880
+ " <td>0.000023</td>\n",
881
+ " <td>9.0</td>\n",
882
+ " <td>...</td>\n",
883
+ " <td>0.0275</td>\n",
884
+ " <td>121.051</td>\n",
885
+ " <td>4.0</td>\n",
886
+ " <td>https://api.spotify.com/v1/tracks/4S1bYWrLOC8s...</td>\n",
887
+ " <td>4S1bYWrLOC8smuy8kJzxKQ</td>\n",
888
+ " <td>0.466</td>\n",
889
+ " <td>1995-09-12</td>\n",
890
+ " <td>Ricky Martin</td>\n",
891
+ " <td>[dance pop, latin, latin pop, mexican pop, pop...</td>\n",
892
+ " <td>1995</td>\n",
893
+ " </tr>\n",
894
+ " <tr>\n",
895
+ " <th>4</th>\n",
896
+ " <td>09hVIj6vWgoCDtT03h8ZCa</td>\n",
897
+ " <td>0.0316</td>\n",
898
+ " <td>0.727</td>\n",
899
+ " <td>1.0</td>\n",
900
+ " <td>218773.0</td>\n",
901
+ " <td>0.380</td>\n",
902
+ " <td>https://api.spotify.com/v1/tracks/758mQT4zzlvB...</td>\n",
903
+ " <td>758mQT4zzlvBhy9PvNePwC</td>\n",
904
+ " <td>0.000000</td>\n",
905
+ " <td>7.0</td>\n",
906
+ " <td>...</td>\n",
907
+ " <td>0.3350</td>\n",
908
+ " <td>92.050</td>\n",
909
+ " <td>4.0</td>\n",
910
+ " <td>https://api.spotify.com/v1/tracks/758mQT4zzlvB...</td>\n",
911
+ " <td>758mQT4zzlvBhy9PvNePwC</td>\n",
912
+ " <td>0.455</td>\n",
913
+ " <td>1991-09-24</td>\n",
914
+ " <td>A Tribe Called Quest</td>\n",
915
+ " <td>[alternative hip hop, conscious hip hop, east ...</td>\n",
916
+ " <td>1991</td>\n",
917
+ " </tr>\n",
918
+ " </tbody>\n",
919
+ "</table>\n",
920
+ "<p>5 rows × 28 columns</p>\n",
921
+ "</div>"
922
+ ],
923
+ "text/plain": [
924
+ " artists_id acousticness danceability disc_number \\\n",
925
+ "0 68WwJXWrpo1yVOOIZjLSeT 0.0268 0.506 1.0 \n",
926
+ "1 09xj0S68Y1OU1vHMCZAIvz 0.5050 0.487 1.0 \n",
927
+ "2 6pSsE5y0uJMwYj83KrPyf9 0.1330 0.629 1.0 \n",
928
+ "3 7slfeZO9LsJbWgpkIoXBUJ 0.4060 0.590 1.0 \n",
929
+ "4 09hVIj6vWgoCDtT03h8ZCa 0.0316 0.727 1.0 \n",
930
+ "\n",
931
+ " duration_ms energy href \\\n",
932
+ "0 248777.0 0.741 https://api.spotify.com/v1/tracks/0UATU9OJxh4m... \n",
933
+ "1 171573.0 0.297 https://api.spotify.com/v1/tracks/4JH1M62gVDND... \n",
934
+ "2 207396.0 0.706 https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU... \n",
935
+ "3 279000.0 0.597 https://api.spotify.com/v1/tracks/4S1bYWrLOC8s... \n",
936
+ "4 218773.0 0.380 https://api.spotify.com/v1/tracks/758mQT4zzlvB... \n",
937
+ "\n",
938
+ " id instrumentalness key ... speechiness tempo \\\n",
939
+ "0 0UATU9OJxh4m3fwDljdGZn 0.000027 1.0 ... 0.0349 94.042 \n",
940
+ "1 4JH1M62gVDNDhDAUiQB3Qv 0.000052 11.0 ... 0.0915 185.912 \n",
941
+ "2 0h7Ld5CvgzaUN1zA3tdyPq 0.000000 1.0 ... 0.4360 81.220 \n",
942
+ "3 4S1bYWrLOC8smuy8kJzxKQ 0.000023 9.0 ... 0.0275 121.051 \n",
943
+ "4 758mQT4zzlvBhy9PvNePwC 0.000000 7.0 ... 0.3350 92.050 \n",
944
+ "\n",
945
+ " time_signature track_href \\\n",
946
+ "0 4.0 https://api.spotify.com/v1/tracks/0UATU9OJxh4m... \n",
947
+ "1 3.0 https://api.spotify.com/v1/tracks/4JH1M62gVDND... \n",
948
+ "2 4.0 https://api.spotify.com/v1/tracks/0h7Ld5CvgzaU... \n",
949
+ "3 4.0 https://api.spotify.com/v1/tracks/4S1bYWrLOC8s... \n",
950
+ "4 4.0 https://api.spotify.com/v1/tracks/758mQT4zzlvB... \n",
951
+ "\n",
952
+ " uri valence release_date artists_name \\\n",
953
+ "0 0UATU9OJxh4m3fwDljdGZn 0.236 2018-09-28 Evalyn \n",
954
+ "1 4JH1M62gVDNDhDAUiQB3Qv 0.289 2001-08-21 Café Tacvba \n",
955
+ "2 0h7Ld5CvgzaUN1zA3tdyPq 0.543 2019-01-25 Dawn Richard \n",
956
+ "3 4S1bYWrLOC8smuy8kJzxKQ 0.466 1995-09-12 Ricky Martin \n",
957
+ "4 758mQT4zzlvBhy9PvNePwC 0.455 1991-09-24 A Tribe Called Quest \n",
958
+ "\n",
959
+ " genres release_year \n",
960
+ "0 [electropop, indie electro-pop, indie poptimis... 2018 \n",
961
+ "1 [latin, latin alternative, latin rock, mexican... 2001 \n",
962
+ "2 [alternative r&b, deep pop r&b, escape room, h... 2019 \n",
963
+ "3 [dance pop, latin, latin pop, mexican pop, pop... 1995 \n",
964
+ "4 [alternative hip hop, conscious hip hop, east ... 1991 \n",
965
+ "\n",
966
+ "[5 rows x 28 columns]"
967
+ ]
968
+ },
969
+ "metadata": {},
970
+ "output_type": "display_data"
971
+ },
972
+ {
973
+ "data": {
974
+ "text/plain": [
975
+ "Index(['artists_id', 'acousticness', 'danceability', 'disc_number',\n",
976
+ " 'duration_ms', 'energy', 'href', 'id', 'instrumentalness', 'key',\n",
977
+ " 'liveness', 'loudness', 'lyrics', 'mode', 'name', 'playlist',\n",
978
+ " 'popularity', 'preview_url', 'speechiness', 'tempo', 'time_signature',\n",
979
+ " 'track_href', 'uri', 'valence', 'release_date', 'artists_name',\n",
980
+ " 'genres', 'release_year'],\n",
981
+ " dtype='object')"
982
+ ]
983
+ },
984
+ "execution_count": 12,
985
+ "metadata": {},
986
+ "output_type": "execute_result"
987
+ }
988
+ ],
989
+ "source": [
990
+ "display(filtered_track_df.head())\n",
991
+ "filtered_track_df.columns"
992
+ ]
993
+ },
994
+ {
995
+ "cell_type": "code",
996
+ "execution_count": 13,
997
+ "metadata": {},
998
+ "outputs": [],
999
+ "source": [
1000
+ "filtered_track_df.to_csv(\"clean_data.csv\", index=False)"
1001
+ ]
1002
+ },
1003
+ {
1004
+ "cell_type": "code",
1005
+ "execution_count": 14,
1006
+ "metadata": {},
1007
+ "outputs": [
1008
+ {
1009
+ "data": {
1010
+ "text/html": [
1011
+ "<div>\n",
1012
+ "<style scoped>\n",
1013
+ " .dataframe tbody tr th:only-of-type {\n",
1014
+ " vertical-align: middle;\n",
1015
+ " }\n",
1016
+ "\n",
1017
+ " .dataframe tbody tr th {\n",
1018
+ " vertical-align: top;\n",
1019
+ " }\n",
1020
+ "\n",
1021
+ " .dataframe thead th {\n",
1022
+ " text-align: right;\n",
1023
+ " }\n",
1024
+ "</style>\n",
1025
+ "<table border=\"1\" class=\"dataframe\">\n",
1026
+ " <thead>\n",
1027
+ " <tr style=\"text-align: right;\">\n",
1028
+ " <th></th>\n",
1029
+ " <th>Unnamed: 0</th>\n",
1030
+ " <th>acousticness</th>\n",
1031
+ " <th>danceability</th>\n",
1032
+ " <th>disc_number</th>\n",
1033
+ " <th>duration_ms</th>\n",
1034
+ " <th>energy</th>\n",
1035
+ " <th>instrumentalness</th>\n",
1036
+ " <th>key</th>\n",
1037
+ " <th>liveness</th>\n",
1038
+ " <th>loudness</th>\n",
1039
+ " <th>mode</th>\n",
1040
+ " <th>popularity</th>\n",
1041
+ " <th>speechiness</th>\n",
1042
+ " <th>tempo</th>\n",
1043
+ " <th>time_signature</th>\n",
1044
+ " <th>track_number</th>\n",
1045
+ " <th>valence</th>\n",
1046
+ " </tr>\n",
1047
+ " </thead>\n",
1048
+ " <tbody>\n",
1049
+ " <tr>\n",
1050
+ " <th>count</th>\n",
1051
+ " <td>101939.000000</td>\n",
1052
+ " <td>101939.000000</td>\n",
1053
+ " <td>101939.000000</td>\n",
1054
+ " <td>101939.000000</td>\n",
1055
+ " <td>1.019390e+05</td>\n",
1056
+ " <td>101939.000000</td>\n",
1057
+ " <td>101939.000000</td>\n",
1058
+ " <td>101939.000000</td>\n",
1059
+ " <td>101939.000000</td>\n",
1060
+ " <td>101939.000000</td>\n",
1061
+ " <td>101939.000000</td>\n",
1062
+ " <td>101939.000000</td>\n",
1063
+ " <td>101939.000000</td>\n",
1064
+ " <td>101939.000000</td>\n",
1065
+ " <td>101939.000000</td>\n",
1066
+ " <td>101939.000000</td>\n",
1067
+ " <td>101939.000000</td>\n",
1068
+ " </tr>\n",
1069
+ " <tr>\n",
1070
+ " <th>mean</th>\n",
1071
+ " <td>50969.000000</td>\n",
1072
+ " <td>0.352124</td>\n",
1073
+ " <td>0.586015</td>\n",
1074
+ " <td>1.032166</td>\n",
1075
+ " <td>2.467708e+05</td>\n",
1076
+ " <td>0.586479</td>\n",
1077
+ " <td>0.148776</td>\n",
1078
+ " <td>5.270858</td>\n",
1079
+ " <td>0.197640</td>\n",
1080
+ " <td>-9.462720</td>\n",
1081
+ " <td>0.618154</td>\n",
1082
+ " <td>39.782311</td>\n",
1083
+ " <td>0.128841</td>\n",
1084
+ " <td>118.358527</td>\n",
1085
+ " <td>3.875651</td>\n",
1086
+ " <td>4.608060</td>\n",
1087
+ " <td>0.482813</td>\n",
1088
+ " </tr>\n",
1089
+ " <tr>\n",
1090
+ " <th>std</th>\n",
1091
+ " <td>29427.398883</td>\n",
1092
+ " <td>0.334855</td>\n",
1093
+ " <td>0.177724</td>\n",
1094
+ " <td>0.566789</td>\n",
1095
+ " <td>1.904303e+05</td>\n",
1096
+ " <td>0.260170</td>\n",
1097
+ " <td>0.304024</td>\n",
1098
+ " <td>3.577679</td>\n",
1099
+ " <td>0.175391</td>\n",
1100
+ " <td>6.198508</td>\n",
1101
+ " <td>0.485841</td>\n",
1102
+ " <td>16.790769</td>\n",
1103
+ " <td>0.203324</td>\n",
1104
+ " <td>30.224074</td>\n",
1105
+ " <td>0.517008</td>\n",
1106
+ " <td>7.181805</td>\n",
1107
+ " <td>0.261690</td>\n",
1108
+ " </tr>\n",
1109
+ " <tr>\n",
1110
+ " <th>min</th>\n",
1111
+ " <td>0.000000</td>\n",
1112
+ " <td>0.000000</td>\n",
1113
+ " <td>0.000000</td>\n",
1114
+ " <td>1.000000</td>\n",
1115
+ " <td>1.155000e+03</td>\n",
1116
+ " <td>0.000000</td>\n",
1117
+ " <td>0.000000</td>\n",
1118
+ " <td>0.000000</td>\n",
1119
+ " <td>0.000000</td>\n",
1120
+ " <td>-60.000000</td>\n",
1121
+ " <td>0.000000</td>\n",
1122
+ " <td>0.000000</td>\n",
1123
+ " <td>0.000000</td>\n",
1124
+ " <td>0.000000</td>\n",
1125
+ " <td>0.000000</td>\n",
1126
+ " <td>1.000000</td>\n",
1127
+ " <td>0.000000</td>\n",
1128
+ " </tr>\n",
1129
+ " <tr>\n",
1130
+ " <th>25%</th>\n",
1131
+ " <td>25484.500000</td>\n",
1132
+ " <td>0.040700</td>\n",
1133
+ " <td>0.480000</td>\n",
1134
+ " <td>1.000000</td>\n",
1135
+ " <td>1.840000e+05</td>\n",
1136
+ " <td>0.411000</td>\n",
1137
+ " <td>0.000000</td>\n",
1138
+ " <td>2.000000</td>\n",
1139
+ " <td>0.095600</td>\n",
1140
+ " <td>-11.149000</td>\n",
1141
+ " <td>0.000000</td>\n",
1142
+ " <td>29.000000</td>\n",
1143
+ " <td>0.036400</td>\n",
1144
+ " <td>95.973000</td>\n",
1145
+ " <td>4.000000</td>\n",
1146
+ " <td>1.000000</td>\n",
1147
+ " <td>0.271000</td>\n",
1148
+ " </tr>\n",
1149
+ " <tr>\n",
1150
+ " <th>50%</th>\n",
1151
+ " <td>50969.000000</td>\n",
1152
+ " <td>0.238000</td>\n",
1153
+ " <td>0.610000</td>\n",
1154
+ " <td>1.000000</td>\n",
1155
+ " <td>2.168930e+05</td>\n",
1156
+ " <td>0.629000</td>\n",
1157
+ " <td>0.000037</td>\n",
1158
+ " <td>5.000000</td>\n",
1159
+ " <td>0.124000</td>\n",
1160
+ " <td>-7.599000</td>\n",
1161
+ " <td>1.000000</td>\n",
1162
+ " <td>41.000000</td>\n",
1163
+ " <td>0.050600</td>\n",
1164
+ " <td>118.067000</td>\n",
1165
+ " <td>4.000000</td>\n",
1166
+ " <td>2.000000</td>\n",
1167
+ " <td>0.477000</td>\n",
1168
+ " </tr>\n",
1169
+ " <tr>\n",
1170
+ " <th>75%</th>\n",
1171
+ " <td>76453.500000</td>\n",
1172
+ " <td>0.645000</td>\n",
1173
+ " <td>0.714000</td>\n",
1174
+ " <td>1.000000</td>\n",
1175
+ " <td>2.610550e+05</td>\n",
1176
+ " <td>0.798000</td>\n",
1177
+ " <td>0.034400</td>\n",
1178
+ " <td>8.000000</td>\n",
1179
+ " <td>0.241000</td>\n",
1180
+ " <td>-5.509000</td>\n",
1181
+ " <td>1.000000</td>\n",
1182
+ " <td>52.000000</td>\n",
1183
+ " <td>0.104000</td>\n",
1184
+ " <td>136.045000</td>\n",
1185
+ " <td>4.000000</td>\n",
1186
+ " <td>6.000000</td>\n",
1187
+ " <td>0.693000</td>\n",
1188
+ " </tr>\n",
1189
+ " <tr>\n",
1190
+ " <th>max</th>\n",
1191
+ " <td>101938.000000</td>\n",
1192
+ " <td>0.996000</td>\n",
1193
+ " <td>0.989000</td>\n",
1194
+ " <td>81.000000</td>\n",
1195
+ " <td>5.505831e+06</td>\n",
1196
+ " <td>1.000000</td>\n",
1197
+ " <td>1.000000</td>\n",
1198
+ " <td>11.000000</td>\n",
1199
+ " <td>0.999000</td>\n",
1200
+ " <td>2.719000</td>\n",
1201
+ " <td>1.000000</td>\n",
1202
+ " <td>97.000000</td>\n",
1203
+ " <td>0.969000</td>\n",
1204
+ " <td>244.035000</td>\n",
1205
+ " <td>5.000000</td>\n",
1206
+ " <td>655.000000</td>\n",
1207
+ " <td>0.993000</td>\n",
1208
+ " </tr>\n",
1209
+ " </tbody>\n",
1210
+ "</table>\n",
1211
+ "</div>"
1212
+ ],
1213
+ "text/plain": [
1214
+ " Unnamed: 0 acousticness danceability disc_number \\\n",
1215
+ "count 101939.000000 101939.000000 101939.000000 101939.000000 \n",
1216
+ "mean 50969.000000 0.352124 0.586015 1.032166 \n",
1217
+ "std 29427.398883 0.334855 0.177724 0.566789 \n",
1218
+ "min 0.000000 0.000000 0.000000 1.000000 \n",
1219
+ "25% 25484.500000 0.040700 0.480000 1.000000 \n",
1220
+ "50% 50969.000000 0.238000 0.610000 1.000000 \n",
1221
+ "75% 76453.500000 0.645000 0.714000 1.000000 \n",
1222
+ "max 101938.000000 0.996000 0.989000 81.000000 \n",
1223
+ "\n",
1224
+ " duration_ms energy instrumentalness key \\\n",
1225
+ "count 1.019390e+05 101939.000000 101939.000000 101939.000000 \n",
1226
+ "mean 2.467708e+05 0.586479 0.148776 5.270858 \n",
1227
+ "std 1.904303e+05 0.260170 0.304024 3.577679 \n",
1228
+ "min 1.155000e+03 0.000000 0.000000 0.000000 \n",
1229
+ "25% 1.840000e+05 0.411000 0.000000 2.000000 \n",
1230
+ "50% 2.168930e+05 0.629000 0.000037 5.000000 \n",
1231
+ "75% 2.610550e+05 0.798000 0.034400 8.000000 \n",
1232
+ "max 5.505831e+06 1.000000 1.000000 11.000000 \n",
1233
+ "\n",
1234
+ " liveness loudness mode popularity \\\n",
1235
+ "count 101939.000000 101939.000000 101939.000000 101939.000000 \n",
1236
+ "mean 0.197640 -9.462720 0.618154 39.782311 \n",
1237
+ "std 0.175391 6.198508 0.485841 16.790769 \n",
1238
+ "min 0.000000 -60.000000 0.000000 0.000000 \n",
1239
+ "25% 0.095600 -11.149000 0.000000 29.000000 \n",
1240
+ "50% 0.124000 -7.599000 1.000000 41.000000 \n",
1241
+ "75% 0.241000 -5.509000 1.000000 52.000000 \n",
1242
+ "max 0.999000 2.719000 1.000000 97.000000 \n",
1243
+ "\n",
1244
+ " speechiness tempo time_signature track_number \\\n",
1245
+ "count 101939.000000 101939.000000 101939.000000 101939.000000 \n",
1246
+ "mean 0.128841 118.358527 3.875651 4.608060 \n",
1247
+ "std 0.203324 30.224074 0.517008 7.181805 \n",
1248
+ "min 0.000000 0.000000 0.000000 1.000000 \n",
1249
+ "25% 0.036400 95.973000 4.000000 1.000000 \n",
1250
+ "50% 0.050600 118.067000 4.000000 2.000000 \n",
1251
+ "75% 0.104000 136.045000 4.000000 6.000000 \n",
1252
+ "max 0.969000 244.035000 5.000000 655.000000 \n",
1253
+ "\n",
1254
+ " valence \n",
1255
+ "count 101939.000000 \n",
1256
+ "mean 0.482813 \n",
1257
+ "std 0.261690 \n",
1258
+ "min 0.000000 \n",
1259
+ "25% 0.271000 \n",
1260
+ "50% 0.477000 \n",
1261
+ "75% 0.693000 \n",
1262
+ "max 0.993000 "
1263
+ ]
1264
+ },
1265
+ "execution_count": 14,
1266
+ "metadata": {},
1267
+ "output_type": "execute_result"
1268
+ }
1269
+ ],
1270
+ "source": [
1271
+ "tracks.describe()"
1272
+ ]
1273
+ }
1274
+ ],
1275
+ "metadata": {
1276
+ "kernelspec": {
1277
+ "display_name": "base",
1278
+ "language": "python",
1279
+ "name": "python3"
1280
+ },
1281
+ "language_info": {
1282
+ "codemirror_mode": {
1283
+ "name": "ipython",
1284
+ "version": 3
1285
+ },
1286
+ "file_extension": ".py",
1287
+ "mimetype": "text/x-python",
1288
+ "name": "python",
1289
+ "nbconvert_exporter": "python",
1290
+ "pygments_lexer": "ipython3",
1291
+ "version": "3.8.8"
1292
+ },
1293
+ "vscode": {
1294
+ "interpreter": {
1295
+ "hash": "fcea6937c5a281949bbc174b5880db8814ebdb6fc47b05585d01c7da350dd15c"
1296
+ }
1297
+ }
1298
+ },
1299
+ "nbformat": 4,
1300
+ "nbformat_minor": 2
1301
+ }
pages/Recommender.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ # Configure Streamlit page
4
+ st.set_page_config(
5
+ page_title="Find the Song that You Like🎸", page_icon="🎸", layout="wide"
6
+ )
7
+
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import streamlit.components.v1 as components
11
+ from sklearn.neighbors import NearestNeighbors
12
+
13
+
14
+ @st.cache(allow_output_mutation=True)
15
+ def data_import():
16
+ """Function for loading in cleaned data csv file."""
17
+ df = pd.read_csv("data/clean_data.csv")
18
+ df["genres"] = df.genres.apply(
19
+ lambda x: [i[1:-1] for i in str(x)[1:-1].split(", ")]
20
+ )
21
+ df_explode = df.explode("genres")
22
+ return df_explode
23
+
24
+
25
+ genre_names = [
26
+ "Dance Pop",
27
+ "Electronic",
28
+ "Electropop",
29
+ "Hip Hop",
30
+ "Jazz",
31
+ "K-pop",
32
+ "Latin",
33
+ "Pop",
34
+ "Pop Rap",
35
+ "R&B",
36
+ "Rock",
37
+ ]
38
+ audio_params = [
39
+ "acousticness",
40
+ "danceability",
41
+ "energy",
42
+ "instrumentalness",
43
+ "valence",
44
+ "tempo",
45
+ ]
46
+
47
+ df_explode = data_import()
48
+
49
+
50
+ def match_song(genre, yr_start, yr_end, test_feat):
51
+ """Function for finding similar songs with KNN algorithm."""
52
+ genre = genre.lower()
53
+ genre_data = df_explode[
54
+ (df_explode["genres"] == genre)
55
+ & (df_explode["release_year"] >= yr_start)
56
+ & (df_explode["release_year"] <= yr_end)
57
+ ]
58
+ genre_data = genre_data.sort_values(by="popularity", ascending=False)[:500]
59
+
60
+ # Load KNN from SkLearn
61
+ neigh = NearestNeighbors()
62
+ neigh.fit(genre_data[audio_params].to_numpy())
63
+
64
+ n_neighbors = neigh.kneighbors(
65
+ [test_feat], n_neighbors=len(genre_data), return_distance=False
66
+ )[0]
67
+
68
+ uris = genre_data.iloc[n_neighbors]["uri"].tolist()
69
+ audios = genre_data.iloc[n_neighbors][audio_params].to_numpy()
70
+
71
+ return uris, audios
72
+
73
+
74
+ # Setup page order
75
+ def page():
76
+ title = "Find Your Song🎸"
77
+ st.title(title)
78
+
79
+ st.write(
80
+ "Get recommended songs on Spotify based on genre and key audio parameters."
81
+ )
82
+ st.markdown("##")
83
+
84
+ # Streamlit column layout
85
+ with st.container():
86
+ col1, col2, col3, col4 = st.columns((2, 0.5, 0.5, 0.5))
87
+
88
+ with col3:
89
+ st.markdown("***Select genre:***")
90
+ genre = st.radio("", genre_names, index=genre_names.index("Rock"))
91
+
92
+ with col1:
93
+ st.markdown("***Select audio parameters to customize:***")
94
+ yr_start, yr_end = st.slider(
95
+ "Select the year range", 1908, 2022, (1980, 2022)
96
+ )
97
+ acousticness = st.slider("Acousticness", 0.0, 1.0, 0.5)
98
+ danceability = st.slider("Danceability", 0.0, 1.0, 0.5)
99
+ energy = st.slider("Energy", 0.0, 1.0, 0.5)
100
+ instrumentalness = st.slider("Instrumentalness", 0.0, 1.0, 0.5)
101
+ valence = st.slider("Valence", 0.0, 1.0, 0.45)
102
+ tempo = st.slider("Tempo", 0.0, 244.0, 125.01)
103
+
104
+ pr_page_tracks = 6
105
+ test_feat = [acousticness, danceability, energy, instrumentalness, valence, tempo]
106
+ uris, audios = match_song(genre, yr_start, yr_end, test_feat)
107
+
108
+ tracks = []
109
+ for uri in uris:
110
+ track = """<iframe src="https://open.spotify.com/embed/track/{}" width="280" height="400" frameborder="0" allowtransparency="true" allow="encrypted-media"></iframe>""".format(
111
+ uri
112
+ )
113
+ tracks.append(track)
114
+
115
+ if "previous_inputs" not in st.session_state:
116
+ st.session_state["previous_inputs"] = [genre, yr_start, yr_end] + test_feat
117
+
118
+ current_inputs = [genre, yr_start, yr_end] + test_feat
119
+ if current_inputs != st.session_state["previous_inputs"]:
120
+ if "start_track_i" in st.session_state:
121
+ st.session_state["start_track_i"] = 0
122
+
123
+ st.session_state["previous_inputs"] = current_inputs
124
+
125
+ if "start_track_i" not in st.session_state:
126
+ st.session_state["start_track_i"] = 0
127
+
128
+ with st.container():
129
+ col1, col2, col3 = st.columns([2, 1, 2])
130
+ if st.button("More Songs"):
131
+ if st.session_state["start_track_i"] < len(tracks):
132
+ st.session_state["start_track_i"] += pr_page_tracks
133
+
134
+ current_tracks = tracks[
135
+ st.session_state["start_track_i"] : st.session_state["start_track_i"]
136
+ + pr_page_tracks
137
+ ]
138
+ current_audios = audios[
139
+ st.session_state["start_track_i"] : st.session_state["start_track_i"]
140
+ + pr_page_tracks
141
+ ]
142
+ if st.session_state["start_track_i"] < len(tracks):
143
+ for i, (track, audio) in enumerate(zip(current_tracks, current_audios)):
144
+ if i % 2 == 0:
145
+ with col1:
146
+ components.html(
147
+ track,
148
+ height=400,
149
+ )
150
+ with st.expander("Display Chart"):
151
+ df = pd.DataFrame(dict(r=audio[:5], theta=audio_params[:5]))
152
+ fig = px.line_polar(
153
+ df, r="r", theta="theta", line_close=True
154
+ )
155
+ fig.update_layout(height=400, width=340)
156
+ st.plotly_chart(fig)
157
+
158
+ else:
159
+ with col3:
160
+ components.html(
161
+ track,
162
+ height=400,
163
+ )
164
+ with st.expander("Display Chart"):
165
+ df = pd.DataFrame(dict(r=audio[:5], theta=audio_params[:5]))
166
+ fig = px.line_polar(
167
+ df, r="r", theta="theta", line_close=True
168
+ )
169
+ fig.update_layout(height=400, width=340)
170
+ st.plotly_chart(fig)
171
+ else:
172
+ st.write("No more songs")
173
+
174
+
175
+ page()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit==1.11.1
2
+ pandas==1.4.2
3
+ plotly==5.7.0
4
+ scikit-learn==1.0.2