yiyixuxu commited on
Commit
00e7c2b
1 Parent(s): c97026d
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -127,6 +127,7 @@ def captioned_strip(images, caption=None, times=None, rows=1):
127
  def run_inference(url, sampling_interval, search_query):
128
  original_images, images = vid2frames(url,sampling_interval)
129
  image_input = torch.tensor(np.stack(images)).to(device)
 
130
  with torch.no_grad():
131
  image_features = model.encode_image(image_input)
132
  text_features = model.encode_text(clip.tokenize(search_query).to(device))
@@ -136,11 +137,13 @@ def run_inference(url, sampling_interval, search_query):
136
 
137
  similarity = (100.0 * image_features @ text_features.T)
138
  values, indices = similarity.topk(4, dim=0)
139
-
140
  best_frames = [original_images[ind] for ind in indices]
141
  times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
 
142
  image_output = captioned_strip(best_frames,search_query, times,2)
143
  title = search_query
 
144
  return(title, image_output)
145
 
146
  inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video!"),
 
127
  def run_inference(url, sampling_interval, search_query):
128
  original_images, images = vid2frames(url,sampling_interval)
129
  image_input = torch.tensor(np.stack(images)).to(device)
130
+ print("testing.. created image_input")
131
  with torch.no_grad():
132
  image_features = model.encode_image(image_input)
133
  text_features = model.encode_text(clip.tokenize(search_query).to(device))
 
137
 
138
  similarity = (100.0 * image_features @ text_features.T)
139
  values, indices = similarity.topk(4, dim=0)
140
+ print("testing.. selected best frames")
141
  best_frames = [original_images[ind] for ind in indices]
142
  times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
143
+ print("testing... before captioned_strip func")
144
  image_output = captioned_strip(best_frames,search_query, times,2)
145
  title = search_query
146
+ print("testing... after captioned_strip func")
147
  return(title, image_output)
148
 
149
  inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video!"),