Iskaj commited on
Commit
727e567
1 Parent(s): ed0180d

add documentation to data.py

Browse files
Files changed (1) hide show
  1. data.py +5 -1
data.py CHANGED
@@ -4,6 +4,9 @@ import shutil
4
 
5
  from videohash import filepath_from_url
6
 
 
 
 
7
  with open('apb2022.json') as filein:
8
  urls, videos, url2video, video2url = [], [], {}, {}
9
  for item in json.load(filein):
@@ -12,13 +15,14 @@ with open('apb2022.json') as filein:
12
  url2video[item['url']] = item['mp4']
13
  video2url[item['mp4']] = item['url']
14
 
 
15
  for url in videos:
16
  filepath = filepath_from_url(url) + '.index'
17
  datapath = os.path.join('data', os.path.basename(filepath))
18
  if not os.path.exists(filepath) and os.path.exists(datapath):
19
  shutil.copyfile(datapath, filepath)
20
 
21
-
22
  if __name__ == "__main__":
23
  from videomatch import get_video_index
24
 
 
4
 
5
  from videohash import filepath_from_url
6
 
7
+ # < Algemene Politieke Beschouwing 2022 >
8
+ # Load this data based on a .json file to get those videos to compare to.
9
+ # This can be updated with any .json file containing other videos.
10
  with open('apb2022.json') as filein:
11
  urls, videos, url2video, video2url = [], [], {}, {}
12
  for item in json.load(filein):
 
15
  url2video[item['url']] = item['mp4']
16
  video2url[item['mp4']] = item['url']
17
 
18
+ # Get filepaths for the url's indices in the dataset and copy those to data folder if they're not present
19
  for url in videos:
20
  filepath = filepath_from_url(url) + '.index'
21
  datapath = os.path.join('data', os.path.basename(filepath))
22
  if not os.path.exists(filepath) and os.path.exists(datapath):
23
  shutil.copyfile(datapath, filepath)
24
 
25
+ # To manually build the indices for the above dataset.
26
  if __name__ == "__main__":
27
  from videomatch import get_video_index
28