yiyixuxu commited on
Commit
96f1e87
1 Parent(s): 9855e99

add article

Browse files
Files changed (1) hide show
  1. app.py +36 -37
app.py CHANGED
@@ -17,9 +17,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
17
  model, preprocess = clip.load("ViT-B/32")
18
 
19
 
20
- def select_video_format(url, format_note='240p', ext='mp4', max_size = 50000000):
21
  defaults = ['480p', '360p','240p','144p']
22
- ydl_opts = {}
23
  ydl = youtube_dl.YoutubeDL(ydl_opts)
24
  info_dict = ydl.extract_info(url, download=False)
25
  formats = info_dict.get('formats', None)
@@ -28,21 +28,17 @@ def select_video_format(url, format_note='240p', ext='mp4', max_size = 50000000)
28
  and f['vcodec'].split('.')[0] != 'av01'
29
  and f['filesize'] is not None and f['filesize'] <= max_size]
30
  available_format_notes = set([f['format_note'] for f in formats])
31
- try:
32
- if format_note not in available_format_notes:
33
- format_note = [d for d in defaults if d in available_format_notes][0]
34
- formats = [f for f in formats if f['format_note'] == format_note]
35
 
36
- format = formats[0]
37
- format_id = format.get('format_id', None)
38
- fps = format.get('fps', None)
39
- print(f'format selected: {format}')
40
- except IndexError as err:
41
- print(f"can't find suitable video formats. we are not able to process video larger than 95 Mib at the moment")
42
- format, format_id, fps = None, None, None
 
43
  return(format, format_id, fps)
44
-
45
- # to-do: delete saved videos
46
  def download_video(url):
47
  # create "videos" foder for saved videos
48
  path_videos = Path('videos')
@@ -58,23 +54,24 @@ def download_video(url):
58
  path_video.unlink()
59
  print(f'removed video {path_video}')
60
  # select format to download for given video
61
- # by default select 480p and .mp4
62
- format, format_id, fps = select_video_format(url)
63
- if format_id is not None:
64
- ydl_opts = {
65
  'format':format_id,
66
  'outtmpl': "videos/%(id)s.%(ext)s"}
67
 
68
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
69
- try:
70
- ydl.cache.remove()
71
- meta = ydl.extract_info(url)
72
- save_location = 'videos/' + meta['id'] + '.' + meta['ext']
73
- except youtube_dl.DownloadError as error:
74
- print(f'error with download_video function: {error}')
75
- save_location = None
76
- else:
77
- fps, save_location = None, None
 
78
  return(fps, save_location)
79
 
80
  def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
@@ -90,7 +87,6 @@ def process_video_parallel(video, skip_frames, dest_path, num_processes, process
90
  if count % skip_frames ==0:
91
  filename =f"{dest_path}/{count}.jpg"
92
  cv2.imwrite(filename, frame)
93
- #print(f"saved {filename}")
94
  count += 1
95
  cap.release()
96
 
@@ -136,13 +132,14 @@ def captioned_strip(images, caption=None, times=None, rows=1):
136
  draw.text((60, 3), caption, (255, 255, 255), font=font)
137
  for i,ts in enumerate(times):
138
  draw.text((
139
- (i % rows) * w + 40 , #column poistion
140
- i // rows * h + 33) # row position
141
  , ts,
142
  (255, 255, 255), font=font_small)
143
  return img
144
 
145
  def run_inference(url, sampling_interval, search_query, bs=526):
 
146
  skip_frames, path_frames= vid2frames(url,sampling_interval)
147
  if path_frames is not None:
148
  filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
@@ -173,9 +170,10 @@ def run_inference(url, sampling_interval, search_query, bs=526):
173
 
174
  similarity = (100.0 * image_features @ text_features.T)
175
  values, indices = similarity.topk(4, dim=0)
176
-
 
177
  best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
178
- times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
179
  image_output = captioned_strip(best_frames,search_query, times,2)
180
  title = search_query
181
  print('task complete')
@@ -184,7 +182,7 @@ def run_inference(url, sampling_interval, search_query, bs=526):
184
  image_output = None
185
  return(title, image_output)
186
 
187
- inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video! ( maximum size 50 MB)"),
188
  gr.Number(5,label='sampling interval (seconds)'),
189
  gr.inputs.Textbox(label="What do you want to search?")]
190
  outputs = [
@@ -192,7 +190,7 @@ outputs = [
192
  gr.outputs.Image(label=""),
193
  ]
194
 
195
- example_videos = ['v1rkzUIL8oc', 'k4R5wZs8cxI','0diCvgWv_ng']
196
 
197
  gr.Interface(
198
  run_inference,
@@ -200,10 +198,11 @@ gr.Interface(
200
  outputs=outputs,
201
  title="It Happened One Frame",
202
  description='A CLIP-based app that search YouTube video frame based on text',
 
203
  examples=[
204
  ['https://youtu.be/v1rkzUIL8oc', 1, "James Cagney dancing down the stairs"],
205
  ['https://youtu.be/k4R5wZs8cxI', 1, "James Cagney smashes a grapefruit into Mae Clarke's face"],
206
  ['https://youtu.be/0diCvgWv_ng', 1, "little Deborah practicing her ballet while wearing a tutu in empty restaurant"]
207
  ]
208
- ).launch(debug=True,enable_queue=True)
209
 
 
17
  model, preprocess = clip.load("ViT-B/32")
18
 
19
 
20
+ def select_video_format(url, ydl_opts={}, format_note='240p', ext='mp4', max_size = 500000000):
21
  defaults = ['480p', '360p','240p','144p']
22
+ ydl_opts = ydl_opts
23
  ydl = youtube_dl.YoutubeDL(ydl_opts)
24
  info_dict = ydl.extract_info(url, download=False)
25
  formats = info_dict.get('formats', None)
 
28
  and f['vcodec'].split('.')[0] != 'av01'
29
  and f['filesize'] is not None and f['filesize'] <= max_size]
30
  available_format_notes = set([f['format_note'] for f in formats])
 
 
 
 
31
 
32
+ if format_note not in available_format_notes:
33
+ format_note = [d for d in defaults if d in available_format_notes][0]
34
+ formats = [f for f in formats if f['format_note'] == format_note]
35
+
36
+ format = formats[0]
37
+ format_id = format.get('format_id', None)
38
+ fps = format.get('fps', None)
39
+ print(f'format selected: {format}')
40
  return(format, format_id, fps)
41
+
 
42
  def download_video(url):
43
  # create "videos" foder for saved videos
44
  path_videos = Path('videos')
 
54
  path_video.unlink()
55
  print(f'removed video {path_video}')
56
  # select format to download for given video
57
+ # by default select 240p and .mp4
58
+ try:
59
+ format, format_id, fps = select_video_format(url)
60
+ ydl_opts = {
61
  'format':format_id,
62
  'outtmpl': "videos/%(id)s.%(ext)s"}
63
 
64
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
65
+ try:
66
+ ydl.cache.remove()
67
+ meta = ydl.extract_info(url)
68
+ save_location = 'videos/' + meta['id'] + '.' + meta['ext']
69
+ except youtube_dl.DownloadError as error:
70
+ print(f'error with download_video function: {error}')
71
+ save_location = None
72
+ except IndexError as err:
73
+ print(f"can't find suitable video formats. we are not able to process video larger than 95 Mib at the moment")
74
+ fps, save_location = None, None
75
  return(fps, save_location)
76
 
77
  def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
 
87
  if count % skip_frames ==0:
88
  filename =f"{dest_path}/{count}.jpg"
89
  cv2.imwrite(filename, frame)
 
90
  count += 1
91
  cap.release()
92
 
 
132
  draw.text((60, 3), caption, (255, 255, 255), font=font)
133
  for i,ts in enumerate(times):
134
  draw.text((
135
+ (i // rows) * w + 40 , #column poistion
136
+ i % rows * h + 33) # row position
137
  , ts,
138
  (255, 255, 255), font=font_small)
139
  return img
140
 
141
  def run_inference(url, sampling_interval, search_query, bs=526):
142
+ print(f"search for : {search_query}")
143
  skip_frames, path_frames= vid2frames(url,sampling_interval)
144
  if path_frames is not None:
145
  filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
 
170
 
171
  similarity = (100.0 * image_features @ text_features.T)
172
  values, indices = similarity.topk(4, dim=0)
173
+ print(f"indices for best matches{indices}")
174
+ print(f"filenames for best matches {[filenames[i]for i in indices]}")
175
  best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
176
+ times = [f'{datetime.timedelta(seconds = round(ind[0].item() * sampling_interval,2))}' for ind in indices]
177
  image_output = captioned_strip(best_frames,search_query, times,2)
178
  title = search_query
179
  print('task complete')
 
182
  image_output = None
183
  return(title, image_output)
184
 
185
+ inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video! (maximum size 50 MB)"),
186
  gr.Number(5,label='sampling interval (seconds)'),
187
  gr.inputs.Textbox(label="What do you want to search?")]
188
  outputs = [
 
190
  gr.outputs.Image(label=""),
191
  ]
192
 
193
+ article = "Check out [this blogpost](https://yiyixuxu.github.io/2022/06/12/It-Happened-One-Frame.html) about this app."
194
 
195
  gr.Interface(
196
  run_inference,
 
198
  outputs=outputs,
199
  title="It Happened One Frame",
200
  description='A CLIP-based app that search YouTube video frame based on text',
201
+ article = article,
202
  examples=[
203
  ['https://youtu.be/v1rkzUIL8oc', 1, "James Cagney dancing down the stairs"],
204
  ['https://youtu.be/k4R5wZs8cxI', 1, "James Cagney smashes a grapefruit into Mae Clarke's face"],
205
  ['https://youtu.be/0diCvgWv_ng', 1, "little Deborah practicing her ballet while wearing a tutu in empty restaurant"]
206
  ]
207
+ ).launch(debug=True,enable_queue=True,share=True)
208