Spaces:
Running
Running
DebasishDhal99
commited on
Commit
•
a413623
1
Parent(s):
631450c
A function that compares two playlists and tell which videos are not in both lists.
Browse files- playlist_mismatch.py +93 -0
playlist_mismatch.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def playlists_mismatch(playlistlink1, playlistlink2, output = 'link'):
|
2 |
+
"""Return the ids of videos that are only one of the playlists, and not in the other"""
|
3 |
+
|
4 |
+
def get_yt_id(url, ignore_playlist=False):
|
5 |
+
# Examples:
|
6 |
+
# - http://youtu.be/SA2iWivDJiE
|
7 |
+
# - http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu
|
8 |
+
# - http://www.youtube.com/embed/SA2iWivDJiE
|
9 |
+
# - http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
|
10 |
+
query = urlparse(url)
|
11 |
+
if query.hostname == 'youtu.be': return query.path[1:]
|
12 |
+
if query.hostname in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
|
13 |
+
if not ignore_playlist:
|
14 |
+
# use case: get playlist id not current video in playlist
|
15 |
+
with suppress(KeyError):
|
16 |
+
return parse_qs(query.query)['list'][0]
|
17 |
+
if query.path == '/watch': return parse_qs(query.query)['v'][0]
|
18 |
+
if query.path[:7] == '/watch/': return query.path.split('/')[1]
|
19 |
+
if query.path[:7] == '/embed/': return query.path.split('/')[2]
|
20 |
+
if query.path[:3] == '/v/': return query.path.split('/')[2]
|
21 |
+
|
22 |
+
playlist1id = get_yt_id(playlistlink1)
|
23 |
+
playlist2id = get_yt_id(playlistlink2)
|
24 |
+
print("Playlist IDs obtained")
|
25 |
+
|
26 |
+
assert playlist1id!= None, "Playlist 1 link is invalid"
|
27 |
+
assert playlist2id!= None, "Playlist 2 link is invalid"
|
28 |
+
|
29 |
+
vid1_ids = []
|
30 |
+
vid2_ids = []
|
31 |
+
|
32 |
+
def get_video_ids(playlistid):
|
33 |
+
vid_ids = []
|
34 |
+
next_page_token = None
|
35 |
+
while True:
|
36 |
+
pl_request = youtube.playlistItems().list(
|
37 |
+
part="contentDetails, snippet",
|
38 |
+
playlistId=playlistid,
|
39 |
+
maxResults=50,
|
40 |
+
pageToken = next_page_token
|
41 |
+
)
|
42 |
+
pl_response = pl_request.execute()
|
43 |
+
for item in pl_response['items']:
|
44 |
+
vid_ids.append(item['contentDetails']['videoId'])
|
45 |
+
|
46 |
+
next_page_token = pl_response.get('nextPageToken')
|
47 |
+
if next_page_token is None:
|
48 |
+
break
|
49 |
+
return vid_ids
|
50 |
+
|
51 |
+
vid1_ids = get_video_ids(playlist1id)
|
52 |
+
print("Playlist 1 video IDs obtained, no. of videos:", len(vid1_ids))
|
53 |
+
vid2_ids = get_video_ids(playlist2id)
|
54 |
+
print("Playlist 2 video IDs obtained, no. of videos:", len(vid2_ids))
|
55 |
+
|
56 |
+
print("Video IDs obtained")
|
57 |
+
|
58 |
+
|
59 |
+
def get_playlist_name(playlistid):
|
60 |
+
pl_request = youtube.playlists().list(
|
61 |
+
part="snippet",
|
62 |
+
id=playlistid,
|
63 |
+
maxResults=1
|
64 |
+
)
|
65 |
+
pl_response = pl_request.execute()
|
66 |
+
return pl_response['items'][0]['snippet']['title']
|
67 |
+
|
68 |
+
playlist1name = get_playlist_name(playlist1id)
|
69 |
+
playlist2name = get_playlist_name(playlist2id)
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
def get_video_name(videoid):
|
74 |
+
vid_request = youtube.videos().list(
|
75 |
+
part="snippet",
|
76 |
+
id=videoid,
|
77 |
+
maxResults=1
|
78 |
+
)
|
79 |
+
vid_response = vid_request.execute()
|
80 |
+
return vid_response['items'][0]['snippet']['title']
|
81 |
+
|
82 |
+
firstnotsecond = list(set(vid1_ids) - set(vid2_ids))
|
83 |
+
secondnotfirst = list(set(vid2_ids) - set(vid1_ids))
|
84 |
+
if output == 'id':
|
85 |
+
return firstnotsecond, secondnotfirst
|
86 |
+
elif output == 'link':
|
87 |
+
firstnotsecond = [f"https://youtu.be/{i}" for i in firstnotsecond]
|
88 |
+
secondnotfirst = [f"https://youtu.be/{i}" for i in secondnotfirst]
|
89 |
+
return firstnotsecond, secondnotfirst
|
90 |
+
elif output == 'name':
|
91 |
+
firstnotsecond = [get_video_name(i) for i in firstnotsecond]
|
92 |
+
secondnotfirst = [get_video_name(i) for i in secondnotfirst]
|
93 |
+
return firstnotsecond, secondnotfirst
|