Spaces:
Sleeping
Sleeping
File size: 12,702 Bytes
6a6c71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 |
import gradio as gr
import numpy as np
import pandas as pd
from sklearn.cluster import AffinityPropagation # Remove once model is saved
import scipy
import ast
################## PREPARATION ##################
files = [
'112023_df_tracks_metadata.csv',
'112023_df_tracks_metadata2.csv',
'122023_df_tracks_metadata.csv',
'012024_df_tracks_metadata.csv',
'022024_df_tracks_metadata.csv',
'032024_df_tracks_metadata.csv',
'042024_df_tracks_metadata.csv',
'nonopm.csv',
'opm.csv'
]
dataframes = []
# convert the urls to pandas dataframes
for file in files:
dataframes.append(pd.read_csv(file))
# merge all the dataframes to one dataframe
data = pd.concat(dataframes)
data.drop_duplicates(subset='track.id', inplace=True)
cols = ['track.id', 'track.name', 'track.artists', 'danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
data_lesscol = data[cols]
################## ADDING HAS FILIPINO FEATURE ##################
setofartists = set() # set of all artists in the dataset
artistlinks = {} # links of each artist to their respsective artist page. helps with manual sorting
maxartists = 0 # maximum number of artists per track
#create a list of all the artist names in the dataset
artistslist = data_lesscol['track.artists'].to_list()
for i, x in enumerate(artistslist):
dicts = ast.literal_eval(x)
maxartists = max(maxartists, len(dicts))
for y in dicts:
setofartists.add(y['name'])
artistlinks[y['name']] = y['external_urls']['spotify']
def returnArtistsList(row):
'''
returns a string of artists names, separated by "| "
'''
artistslist = row['track.artists']
artists = []
for y in ast.literal_eval(artistslist):
artists.append(y['name'])
return '| '.join(artists)
# create an artists column, which is just the artists' names separated by "| "
data_lesscol['artists'] = data_lesscol.apply(returnArtistsList, axis=1)
# get the set of the OPM playlists from the opm playlists dataset
opm = dataframes[-1]
OPMartists = set()
for artistobjectarr in opm['track.artists'].to_list():
artistsarr = ast.literal_eval(artistobjectarr)
for artistobject in artistsarr:
OPMartists.add(artistobject['name'])
# some of these artists are not Filipino, but we can just remove them
toRemove = ['Kina Grannis']
for name in toRemove:
OPMartists.remove(name)
################## ALL FILIPINO LIST ##################
# manually retrieved list of OPM artists
manualartists = [
"Orange & Lemons",
"SunKissed Lola",
"YP",
"E.J",
"Dilaw",
"Eros Tongco",
"Teys",
"Lola Amour",
"slimedemidemislime",
"SwKeith",
"juan karlos",
"Jed Baruelo",
"Jthekidd",
"P-Lo",
"rhodessa",
"Hev Abi",
"Martti Franca",
"TONEEJAY",
"Denise Julia",
"SB19",
"Al James",
"HELLMERRY",
"Gabito Ballesteros",
"Calein",
"syd hartha"
"Nateman",
"Tom Odell",
"Realest Cram",
"Janine",
"Madman Stan",
"Sugarcane",
"DEMI",
"gins&melodies",
"Cean Jr.",
"Arthur Miguel",
"Cup of Joe",
"CK YG",
"Janine Berdin",
"Adie",
"Zack Tabudlo",
"Maki",
"NOBITA",
"Arthur Nery",
"Mitski",
"Up Dharma Down",
"Ben&Ben",
"BINI",
"Eraserheads",
"Silent Sanctuary",
"Moira Dela Torre",
"Rivermaya",
"Parokya Ni Edgar",
"The Itchyworms",
"Spongecola",
"December Avenue",
"Alfred Jasper",
"Gico",
"Kitchie Nadal",
"PRETTYMF9INE",
"CA$HMAN",
"Sansette",
"Just Hush",
"Tera",
"Kid Bando",
"Jason Dhakal",
]
# add in the set from the playlists:
for name in manualartists: OPMartists.add(name)
def checkIfOPM(row):
'''
returns 1 if any of the listed artists in the "artists" feature is a Filipino
otherwise, returns 0
'''
artists = row['artists'].split('| ')
if any([artist in OPMartists for artist in artists]):
return 1
else:
return 0
data_lesscol['hasFilipinoArtist'] = data_lesscol.apply(checkIfOPM, axis=1)
data_lesscol.to_csv('RESULT.csv', index=False)
################## PREDICTION ##################
# only use specific cols for clustering
clustercols = ['danceability', 'energy', 'key', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo']
X = data_lesscol[clustercols]
# application of clustering algorithm
ap_cluster = AffinityPropagation(random_state=5)
results = ap_cluster.fit(X)
################## SAMPLE ##################
# Make a song sample to test - Never Gonna Give You Up by Rick Astley (Dance Pop)
sample_track = pd.DataFrame({
'danceability': [0.73],
'energy': [0.94],
'key': [5], # Random. It's Ab Major but idk it's number value
'loudness': [-11.82],
'speechiness': [0.04],
'acousticness': [0.14],
'instrumentalness': [0.01],
'liveness': [0.15],
'valence': [0.92],
'tempo': [113.0] # Tempo is BMM
}, columns=clustercols)
################## PREDICTION ##################
current_closest_track = sample_track
current_track = sample_track
input_flag = False
# TODO: Modify to allow OPM tracks only
def find_closest_song(track_name):
global current_closest_track, current_track, data_lesscol, clustercols, ap_cluster, X, input_flag
artist:str = ''
if ' by ' in track_name:
artist:str = track_name.split(' by ')[1]
track_name = track_name.split(' by ')[0]
if not track_name or track_name not in data_lesscol['track.name'].tolist():
input_flag = False
return 'No songs found. Check if input matches the suggested songs. Input is case-sensitive.'
input_flag = True
track = data_lesscol[data_lesscol['track.name'] == track_name]
print(f"TRACK: {track}")
if len(track) > 1:
if artist: track = data_lesscol[(data_lesscol['track.name'] == track_name) & (data_lesscol['artists'] == artist)].head(1)
else: track = track.head(1)
print(f"TRACK2: {track}")
track = track[clustercols]
# Predict the cluster for the new music instance
cluster_label = ap_cluster.predict(track)[0]
# Exploiting predict with original dataset to see tracks in their respective clusters
whole_cluster = ap_cluster.predict(X)
# Filter the original dataset to find songs in the same cluster
same_cluster_tracks = data_lesscol[whole_cluster == cluster_label]
# Take all the filipino tracks under that cluster
filipino_tracks = same_cluster_tracks[(same_cluster_tracks['hasFilipinoArtist'] == 1) & (same_cluster_tracks['track.name'] != track_name) & (same_cluster_tracks['artists'] != artist)]
print("SAME CLUSTER TRACKS")
print(same_cluster_tracks['track.name'])
print("\n\nFILIPINO TRACKS")
print(filipino_tracks['track.name'])
# Find the closest song in the same cluster
distances = scipy.spatial.distance.cdist(filipino_tracks[clustercols], track)
closest_song_index = np.argmin(distances)
closest_song = filipino_tracks.iloc[closest_song_index]
print("\nDISTANCES")
print(distances)
# Set current variables
current_closest_track = closest_song
current_track = data_lesscol[data_lesscol['track.name'] == track_name]
return f"{closest_song['track.name']} by {closest_song['artists']}"
################## MATCH ##################
# Consists of DataFrames parameter
def match():
global current_closest_track, current_track, X, input_flag
if not input_flag:
return pd.DataFrame({'Features':[],'Match Percentage':[]})
print(f"CLOSEST TRACK: {current_closest_track}")
print(f"CURRENT TRACK: {current_track}")
range_danceability = range_energy = range_speechiness = range_acousticness = range_instrumentalness = range_liveness = range_valence = 1
range_loudness = 60
range_key = 12
range_tempo = max(X['tempo']) - min(X['tempo'])
match_danceability = 100 - abs((current_closest_track["danceability"] - current_track["danceability"])/range_danceability)*100
match_energy = 100 - abs((current_closest_track["energy"] - current_track["energy"])/range_energy)*100
match_key = 100 - abs((current_closest_track["key"] - current_track["key"])/range_key)*100
match_loudness = 100 - abs((current_closest_track["loudness"] - current_track["loudness"])/range_loudness)*100
match_speechiness = 100 - abs((current_closest_track["speechiness"] - current_track["speechiness"])/range_speechiness)*100
match_acousticness = 100 - abs((current_closest_track["acousticness"] - current_track["acousticness"])/range_acousticness)*100
match_instrumentalness = 100 - abs((current_closest_track["instrumentalness"] - current_track["instrumentalness"])/range_instrumentalness)*100
match_liveness = 100 - abs((current_closest_track["liveness"] - current_track["liveness"])/range_liveness)*100
match_valence = 100 - abs((current_closest_track["valence"] - current_track["valence"])/range_valence)*100
match_tempo = 100 - abs((current_closest_track["tempo"] - current_track["tempo"])/range_tempo)*100
overall_match = match_danceability + match_energy + match_key + match_loudness + match_speechiness + match_acousticness + match_instrumentalness + match_liveness + match_valence + match_tempo
overall_match = overall_match/10
return pd.DataFrame({
'Features' : ['Overall Match', 'Danceability', 'Energy', 'Key', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness', 'Valence', 'Tempo'],
'Match Percentage': [overall_match.values[0], match_danceability.values[0], match_energy.values[0], match_key.values[0], match_loudness.values[0], match_speechiness.values[0], match_acousticness.values[0], match_instrumentalness.values[0], match_liveness.values[0], match_valence.values[0], match_tempo.values[0]]
})
################## WEBSITE ##################
# Get all track.name and track.artists of data_lesscol
all_tracks_and_artist = []
for i in data_lesscol[['track.name', 'artists']].values:
all_tracks_and_artist.append(i[0] + ' by ' + i[1])
# Remove duplicates
all_tracks_and_artist = list(set(all_tracks_and_artist))
def filter_and_select_track(filter_text: str):
if not filter_text:
return ''
filtered_choices = [track for track in all_tracks_and_artist if filter_text.lower() in track.lower()]
return '\n'.join(filtered_choices)
def change_input(filter_text: str):
if not filter_text:
return ''
filtered_choices = [track for track in all_tracks_and_artist if filter_text.lower() in track.lower()]
return filtered_choices[0]
with gr.Blocks(theme=gr.themes.Glass(), css=".gradio-container {background: url('https://undsgn.com/wp-content/uploads/2018/04/ltotbngnzzu-uai-1600x900.jpg'); opacity:10}") as demo:
gr.Markdown(
"""
# OPM Song Recommender
#### Music is an integral part of human life. However, local artists tend to struggle financially due to the saturated market. Similarly, listeners may find it difficult to support their local artists, or even find local tracks in the first place. How is an interested listener going to find Filipino music that is similar to their tastes?
#### This application takes in a track (both OPM and non-OPM) and seeks to find OPM music with similar features. Try it out!
"""
)
input_text = gr.Textbox(label="Search Track Name (Case-sensitive) | Format: [TRACK NAME] or [TRACK NAME] by [ARTIST]", placeholder="Search for a song")
suggestion = gr.Textbox('', label="Suggestions Available (Double-click to copy first record in input)", placeholder="Available song in dataset will be shown here.")
input_text.change(filter_and_select_track, inputs=input_text, outputs=suggestion)
suggestion.select(change_input, inputs=input_text, outputs=input_text)
button = gr.Button(value="Search Similar OPM")
result = gr.Textbox(label="Result", value="Result will be shown here.")
button.click(find_closest_song, inputs=input_text, outputs=result)
match_percentage = gr.DataFrame(pd.DataFrame({
'Features':[],
'Match Percentage':[]}))
result.change(match, outputs=match_percentage)
# Display all tracks
gr.Markdown (""" ## All Tracks (OPM and Non-OPM) """)
for track in all_tracks_and_artist:
gr.Markdown(f""" ##### {track} """)
demo.launch(share=True) |