Ifeanyi commited on
Commit
c942722
1 Parent(s): 5f02390

Upload Collaborators.R

Browse files
Files changed (1) hide show
  1. Collaborators.R +533 -0
Collaborators.R ADDED
@@ -0,0 +1,533 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # {
2
+ # # load packages
3
+ # suppressPackageStartupMessages(library(dplyr))
4
+ # suppressPackageStartupMessages(library(spotifyr))
5
+ #
6
+ # # Set up environment
7
+ # client_ID <- "bc0b388b3801497f8162615befb50a43"
8
+ # client_secret <- "512e20aa79ff4a228cc4e95ab46a45fd"
9
+ #
10
+ # Sys.setenv(SPOTIFY_CLIENT_ID = client_ID)
11
+ # Sys.setenv(SPOTIFY_CLIENT_SECRET = client_secret)
12
+ #
13
+ # access_token <- get_spotify_access_token()
14
+ # }
15
+
16
+
17
+ get_artists_collaborators <- function(spotify_artist_id) {
18
+ # related artists nodes function
19
+ get_Nodes <- function(artist_id) {
20
+ # get artists related to main artist
21
+ related_artists <- get_related_artists(
22
+ id = artist_id,
23
+ include_meta_info = TRUE
24
+ )
25
+
26
+ # get other artists that are related to the
27
+ # artists that are related to the main artist
28
+ other_related <- c()
29
+ for (i in 1:nrow(related_artists$artists)) {
30
+ result <- get_related_artists(
31
+ id = related_artists$artists[["id"]][i],
32
+ include_meta_info = TRUE
33
+ )
34
+ other_related <- append(other_related, result)
35
+ }
36
+
37
+ images <- c()
38
+ for (i in other_related) { # this loops through the list
39
+ for (k in 1:nrow(i)) { # this loops through each table in list
40
+ image_urls <- i$images[[k]]$url[2] # the third image is collected per row in each table
41
+ images <- append(images, image_urls)
42
+ }
43
+ }
44
+
45
+ genre <- c()
46
+ for (i in (other_related)) { # this loops through each list
47
+ for (j in 1:nrow(i)) { # this loops through each table in list
48
+ result <- i$genres[[j]][2] # this collects the 2nd item in the vector of genres
49
+ genre <- append(genre, result)
50
+ }
51
+ }
52
+
53
+
54
+ nodes <- data.frame(
55
+ name = tolower(c(
56
+ other_related[[1]]$name,
57
+ other_related[[2]]$name,
58
+ other_related[[3]]$name,
59
+ other_related[[4]]$name,
60
+ other_related[[5]]$name,
61
+ other_related[[6]]$name,
62
+ other_related[[7]]$name,
63
+ other_related[[8]]$name,
64
+ other_related[[9]]$name,
65
+ other_related[[10]]$name,
66
+ other_related[[11]]$name,
67
+ other_related[[12]]$name,
68
+ other_related[[13]]$name,
69
+ other_related[[14]]$name,
70
+ other_related[[15]]$name,
71
+ other_related[[16]]$name,
72
+ other_related[[17]]$name,
73
+ other_related[[18]]$name,
74
+ other_related[[19]]$name,
75
+ other_related[[20]]$name
76
+ )),
77
+ id = c(c(
78
+ other_related[[1]]$id,
79
+ other_related[[2]]$id,
80
+ other_related[[3]]$id,
81
+ other_related[[4]]$id,
82
+ other_related[[5]]$id,
83
+ other_related[[6]]$id,
84
+ other_related[[7]]$id,
85
+ other_related[[8]]$id,
86
+ other_related[[9]]$id,
87
+ other_related[[10]]$id,
88
+ other_related[[11]]$id,
89
+ other_related[[12]]$id,
90
+ other_related[[13]]$id,
91
+ other_related[[14]]$id,
92
+ other_related[[15]]$id,
93
+ other_related[[16]]$id,
94
+ other_related[[17]]$id,
95
+ other_related[[18]]$id,
96
+ other_related[[19]]$id,
97
+ other_related[[20]]$id
98
+ )),
99
+ popularity = c(c(
100
+ other_related[[1]]$popularity,
101
+ other_related[[2]]$popularity,
102
+ other_related[[3]]$popularity,
103
+ other_related[[4]]$popularity,
104
+ other_related[[5]]$popularity,
105
+ other_related[[6]]$popularity,
106
+ other_related[[7]]$popularity,
107
+ other_related[[8]]$popularity,
108
+ other_related[[9]]$popularity,
109
+ other_related[[10]]$popularity,
110
+ other_related[[11]]$popularity,
111
+ other_related[[12]]$popularity,
112
+ other_related[[13]]$popularity,
113
+ other_related[[14]]$popularity,
114
+ other_related[[15]]$popularity,
115
+ other_related[[16]]$popularity,
116
+ other_related[[17]]$popularity,
117
+ other_related[[18]]$popularity,
118
+ other_related[[19]]$popularity,
119
+ other_related[[20]]$popularity
120
+ )),
121
+ followers = c(c(
122
+ other_related[[1]]$followers.total,
123
+ other_related[[2]]$followers.total,
124
+ other_related[[3]]$followers.total,
125
+ other_related[[4]]$followers.total,
126
+ other_related[[5]]$followers.total,
127
+ other_related[[6]]$followers.total,
128
+ other_related[[7]]$followers.total,
129
+ other_related[[8]]$followers.total,
130
+ other_related[[9]]$followers.total,
131
+ other_related[[10]]$followers.total,
132
+ other_related[[11]]$followers.total,
133
+ other_related[[12]]$followers.total,
134
+ other_related[[13]]$followers.total,
135
+ other_related[[14]]$followers.total,
136
+ other_related[[15]]$followers.total,
137
+ other_related[[16]]$followers.total,
138
+ other_related[[17]]$followers.total,
139
+ other_related[[18]]$followers.total,
140
+ other_related[[19]]$followers.total,
141
+ other_related[[20]]$followers.total
142
+ )),
143
+ profile = c(c(
144
+ other_related[[1]]$external_urls.spotify,
145
+ other_related[[2]]$external_urls.spotify,
146
+ other_related[[3]]$external_urls.spotify,
147
+ other_related[[4]]$external_urls.spotify,
148
+ other_related[[5]]$external_urls.spotify,
149
+ other_related[[6]]$external_urls.spotify,
150
+ other_related[[7]]$external_urls.spotify,
151
+ other_related[[8]]$external_urls.spotify,
152
+ other_related[[9]]$external_urls.spotify,
153
+ other_related[[10]]$external_urls.spotify,
154
+ other_related[[11]]$external_urls.spotify,
155
+ other_related[[12]]$external_urls.spotify,
156
+ other_related[[13]]$external_urls.spotify,
157
+ other_related[[14]]$external_urls.spotify,
158
+ other_related[[15]]$external_urls.spotify,
159
+ other_related[[16]]$external_urls.spotify,
160
+ other_related[[17]]$external_urls.spotify,
161
+ other_related[[18]]$external_urls.spotify,
162
+ other_related[[19]]$external_urls.spotify,
163
+ other_related[[20]]$external_urls.spotify
164
+ )),
165
+ images = images,
166
+ genre = genre
167
+ )
168
+
169
+ ## Remove duplicate nodes and labels in data frame
170
+
171
+ nodes_df <- distinct(nodes, name, id, popularity, profile,
172
+ images, genre, followers,
173
+ .keep_all = T
174
+ )
175
+
176
+
177
+ return(nodes_df)
178
+ }
179
+
180
+ # get related artists nodes
181
+ related_artists <- get_Nodes(artist_id = spotify_artist_id)
182
+
183
+ # get related artists data
184
+ artist_related_artists <- function(related_artist) {
185
+ related_artists_data <- list()
186
+
187
+ for (i in 1:nrow(related_artist)) {
188
+ # Get the artist ID from the second column of related_artists
189
+ artist_id <- related_artist[[2]][i]
190
+
191
+ # Retrieve the artist's albums using the artist ID
192
+ result <- get_artist_albums(artist_id, limit = 50)
193
+
194
+ # Create a data frame from the result
195
+ related_artists_albums <- data.frame(result)
196
+
197
+ # Add the data frame to the list
198
+ related_artists_data[[i]] <- related_artists_albums
199
+ }
200
+
201
+ return(related_artists_data)
202
+ }
203
+
204
+ related_artists_data <- artist_related_artists(related_artist = related_artists)
205
+
206
+ # get the artists collaborators
207
+ get_collaborators <- function(data, artist_name) {
208
+ artists <- c() # initialize empty vector
209
+ # outer loop loops through the length of artists list
210
+ for (i in 1:length(data$artists)) {
211
+ # inner loop loops through the length of individual
212
+ # "name" column in artists list
213
+ for (j in 1:length(data$artists[[i]][3][, ])) {
214
+ # scrapes the artist names
215
+ result <- data$artists[[i]][3][j, ]
216
+ # appends the names to "artists" vector
217
+ artists <- append(artists, result)
218
+ }
219
+ }
220
+
221
+ artists <- unique(artists) # removes duplicate names
222
+ artists <- tolower(artists) # turns to lowercase
223
+ # turns the search artist's name to NA
224
+ artists <- gsub(tolower(artist_name), NA, artists)
225
+ artists <- na.omit(artists) # remove NA from vector
226
+
227
+ return(artists)
228
+ }
229
+
230
+ # function that gets the collaborators data
231
+ collab_df <- function(related_artists_data, artist_data) {
232
+ collaborators <- c()
233
+ artists_list <- c()
234
+ for (i in 1:length(related_artists_data)) {
235
+ result <- get_collaborators(related_artists_data[[i]],
236
+ artist_name = artist_data[[1]][i]
237
+ )
238
+
239
+ collaborators <- c(collaborators, result)
240
+ artists_list <- c(artists_list, rep(artist_data[[1]][i], times = length(result)))
241
+ }
242
+
243
+ artists_collaborators <- data.frame(artists = artists_list, collaborators = collaborators)
244
+
245
+ return(artists_collaborators)
246
+ }
247
+
248
+ # application of the function
249
+ collabs <- collab_df(
250
+ related_artists_data = related_artists_data,
251
+ artist_data = related_artists
252
+ )
253
+
254
+ # get attribute data for each collaborator
255
+ attribute_data <- list()
256
+ for (i in 1:nrow(collabs)) {
257
+ attribute_data[[i]] <- search_spotify(collabs$collaborators[[i]],
258
+ type = "artist",
259
+ include_meta_info = T
260
+ )
261
+ }
262
+
263
+ # collect attributes of collaborators
264
+ {
265
+ name <- c()
266
+ id <- c()
267
+ popularity <- c()
268
+ followers <- c()
269
+ profile <- c()
270
+ images <- c()
271
+ genre <- c()
272
+
273
+ for (i in 1:length(attribute_data)) {
274
+ name <- c(name, attribute_data[[i]][[1]][[2]][5][[1]][1])
275
+ id <- c(id, attribute_data[[i]][[1]][[2]][3][[1]][1])
276
+ popularity <- c(popularity, attribute_data[[i]][[1]][[2]][6][[1]][1])
277
+ followers <- c(followers, attribute_data[[i]][[1]][[2]][11][[1]][1])
278
+ profile <- c(profile, attribute_data[[i]][[1]][[2]][9][[1]][1])
279
+ images <- c(images, attribute_data[[i]][[1]][[2]][4][[1]][1])
280
+ genre <- c(genre, attribute_data[[i]][[1]][[2]][1][[1]][1])
281
+ }
282
+ }
283
+
284
+ # loop through images list and store converted
285
+ # data frames in a list
286
+ images_df_list <- list()
287
+ for (i in 1:length(images)) {
288
+ images_df_list[[i]] <- list2DF(images[[i]])
289
+ }
290
+
291
+ # loop through the list of data frames & extract
292
+ # the image urls
293
+ images_vec <- c()
294
+ for (i in 1:length(images_df_list)) {
295
+ images_vec <- c(images_vec, images_df_list[[i]]$url[[1]][1])
296
+ }
297
+
298
+ len_diff_img <- name |>
299
+ length() - images_vec |>
300
+ length()
301
+
302
+ # add a repetition of the last 6 urls to the vector
303
+ # so that its length is equal to the length of other
304
+ # attribute vectors
305
+ images_vec <- c(
306
+ images_vec,
307
+ rep(images_vec[tail(length(images_vec))], times = len_diff_img)
308
+ )
309
+
310
+ # get genre data
311
+ genre_vec <- c()
312
+ for (i in 1:length(genre)) {
313
+ genre_vec <- c(genre_vec, genre[[i]][1])
314
+ }
315
+
316
+ music_genres <- c()
317
+ for (m in 1:length(genre_vec)) {
318
+ music_genres <- c(music_genres, genre_vec[[m]])
319
+ }
320
+
321
+ len_diff_gnr <- name |>
322
+ length() - music_genres |>
323
+ length()
324
+
325
+ music_genres <- c(
326
+ music_genres,
327
+ rep(music_genres[tail(length(music_genres))], times = len_diff_gnr)
328
+ )
329
+
330
+ # create collaborators data frame
331
+ collaborators_df <- data.frame(
332
+ name = name,
333
+ id = id,
334
+ popularity = popularity,
335
+ followers = followers,
336
+ profile = profile,
337
+ images = images_vec,
338
+ genre = music_genres
339
+ )
340
+
341
+ # filter out 2Pac
342
+ collaborators_df <- collaborators_df |>
343
+ filter(name != "2Pac")
344
+
345
+ # rename columns in collabs
346
+ colnames(collabs) <- c("Vertex1", "Vertex2")
347
+
348
+ # grab Vertex1 attributes
349
+ popularity <- c()
350
+ for (i in 1:nrow(collabs)) {
351
+ result <- filter(
352
+ related_artists,
353
+ related_artists$name == collabs$Vertex1[[i]][1]
354
+ )[[3]]
355
+ popularity <- c(popularity, result)
356
+ }
357
+
358
+ followers <- c()
359
+ for (i in 1:nrow(collabs)) {
360
+ result <- filter(
361
+ related_artists,
362
+ related_artists$name == collabs$Vertex1[[i]][1]
363
+ )[[4]]
364
+ followers <- c(followers, result)
365
+ }
366
+
367
+ profile <- c()
368
+ for (i in 1:nrow(collabs)) {
369
+ result <- filter(
370
+ related_artists,
371
+ related_artists$name == collabs$Vertex1[[i]][1]
372
+ )[[5]]
373
+ profile <- c(profile, result)
374
+ }
375
+
376
+ images <- c()
377
+ for (i in 1:nrow(collabs)) {
378
+ result <- filter(
379
+ related_artists,
380
+ related_artists$name == collabs$Vertex1[[i]][1]
381
+ )[[6]]
382
+ images <- c(images, result)
383
+ }
384
+
385
+ genre <- c()
386
+ for (i in 1:nrow(collabs)) {
387
+ result <- filter(
388
+ related_artists,
389
+ related_artists$name == collabs$Vertex1[[i]][1]
390
+ )[[7]]
391
+ genre <- c(genre, result)
392
+ }
393
+
394
+ # convert "names" in collaborators_df to lowercase
395
+ collaborators_df$name <- tolower(collaborators_df$name)
396
+
397
+ # filter out "various artists" from collabs
398
+ collabs <- collabs |>
399
+ filter(Vertex2 != "various artists")
400
+
401
+ # check if name in Vertex2 is an English character
402
+ ascii_check <- c()
403
+ for (i in 1:nrow(collabs)) {
404
+ ascii_check <- c(ascii_check, collabs$Vertex2[[i]][1] |> stringi::stri_enc_isascii())
405
+ }
406
+
407
+ # append check result to collabs dataframe
408
+ collabs$ASCII <- ascii_check
409
+
410
+ # filter out non-English characters
411
+ collabs <- collabs |>
412
+ filter(ASCII != FALSE)
413
+
414
+ # delete ASCII column
415
+ collabs$ASCII <- NULL
416
+
417
+ # delete rows from Vertex1 attributes to equal
418
+ # collabs rows
419
+ popularity <- popularity[-c(1 + length(popularity):nrow(collabs))]
420
+
421
+ followers <- followers[-c(1 + length(followers):nrow(collabs))]
422
+
423
+ profile <- profile[-c(1 + length(profile):nrow(collabs))]
424
+
425
+ images <- images[-c(1 + length(images):nrow(collabs))]
426
+
427
+ genre <- genre[-c(1 + length(genre):nrow(collabs))]
428
+
429
+ # grab Vertex2 attributes
430
+ popularityB <- c()
431
+ for (i in 1:nrow(collabs)) {
432
+ result <- filter(
433
+ collaborators_df,
434
+ collaborators_df$name == collabs$Vertex2[[i]][1]
435
+ )[[3]]
436
+ popularityB <- c(popularityB, result)
437
+ }
438
+
439
+ followersB <- c()
440
+ for (i in 1:nrow(collabs)) {
441
+ result <- filter(
442
+ collaborators_df,
443
+ collaborators_df$name == collabs$Vertex2[[i]][1]
444
+ )[[4]]
445
+ followersB <- c(followersB, result)
446
+ }
447
+
448
+ profileB <- c()
449
+ for (i in 1:nrow(collabs)) {
450
+ result <- filter(
451
+ collaborators_df,
452
+ collaborators_df$name == collabs$Vertex2[[i]][1]
453
+ )[[5]]
454
+ profileB <- c(profileB, result)
455
+ }
456
+
457
+ imagesB <- c()
458
+ for (i in 1:nrow(collabs)) {
459
+ result <- filter(
460
+ collaborators_df,
461
+ collaborators_df$name == collabs$Vertex2[[i]][1]
462
+ )[[6]]
463
+ imagesB <- c(imagesB, result)
464
+ }
465
+
466
+ genreB <- c()
467
+ for (i in 1:nrow(collabs)) {
468
+ result <- filter(
469
+ collaborators_df,
470
+ collaborators_df$name == collabs$Vertex2[[i]][1]
471
+ )[[7]]
472
+ genreB <- c(genreB, result)
473
+ }
474
+
475
+ # delete rows from Vertex2 attributes to equal
476
+ # collabs rows
477
+ popularityB <- popularityB[-c(1 + length(popularityB):nrow(collabs))]
478
+
479
+ followersB <- followersB[-c(1 + length(followersB):nrow(collabs))]
480
+
481
+ profileB <- profileB[-c(1 + length(profileB):nrow(collabs))]
482
+
483
+ imagesB <- imagesB[-c(1 + length(imagesB):nrow(collabs))]
484
+
485
+ genreB <- genreB[-c(1 + length(genreB):nrow(collabs))]
486
+
487
+ # create flat file of collaborators
488
+ {
489
+ collabs$`Vertex1 popularity` <- popularity
490
+ collabs$`Vertex1 followers` <- followers
491
+ collabs$`Vertex1 profile` <- profile
492
+ collabs$`Vertex1 images` <- images
493
+ collabs$`Vertex1 genre` <- genre
494
+
495
+ collabs$`Vertex2 popularity` <- popularityB
496
+ collabs$`Vertex2 followers` <- followersB
497
+ collabs$`Vertex2 profile` <- profileB
498
+ collabs$`Vertex2 images` <- imagesB
499
+ collabs$`Vertex2 genre` <- genreB
500
+ }
501
+
502
+
503
+
504
+ return(collabs)
505
+
506
+
507
+ }
508
+
509
+ # test
510
+ # tictoc::tic()
511
+ # steve_wonder_collab_network <- get_artists_collaborators(spotify_artist_id = "7guDJrEfX3qb6FEbdPA5qi")
512
+ # tictoc::toc()
513
+ #
514
+ #
515
+ # steve_wonder_collab_network |> View()
516
+ #
517
+ # tictoc::tic()
518
+ # billie_eilish_collab_network <- get_artists_collaborators(spotify_artist_id = "6qqNVTkY8uBg9cP3Jd7DAH")
519
+ # tictoc::toc()
520
+ #
521
+ # billie_eilish_collab_network |> View()
522
+ # write.csv(billie_eilish_collab_network,file = "billie_eilish_collab_network.csv")
523
+ #
524
+ # tictoc::tic()
525
+ # madonna_collab_network <- get_artists_collaborators(spotify_artist_id = "6tbjWDEIzxoDsBA1FuhfPW")
526
+ # tictoc::toc()
527
+ #
528
+ # madonna_collab_network |> View()
529
+ #
530
+ # tictoc::tic()
531
+ # diana_ross_collab_network <- get_artists_collaborators(spotify_artist_id = "3MdG05syQeRYPPcClLaUGl")
532
+ # tictoc::toc()
533
+ #