Ahsen Khaliq commited on
Commit
602c097
1 Parent(s): 43d1c17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -22
app.py CHANGED
@@ -11,27 +11,8 @@ import gradio as gr
11
 
12
  # Load the open CLIP model
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
- model, preprocess = clip.load("ViT-B/32", device=device)
15
-
16
-
17
-
18
- def search_video(search_query, display_heatmap=True, display_results_count=1):
19
-
20
- # Encode and normalize the search query using CLIP
21
- with torch.no_grad():
22
- text_features = model.encode_text(clip.tokenize(search_query).to(device))
23
- text_features /= text_features.norm(dim=-1, keepdim=True)
24
-
25
- # Compute the similarity between the search query and each frame using the Cosine similarity
26
- similarities = (100.0 * video_features @ text_features.T)
27
- values, best_photo_idx = similarities.topk(display_results_count, dim=0)
28
-
29
-
30
- for frame_id in best_photo_idx:
31
- frame = video_frames[frame_id]
32
- # Find the timestamp in the video and display it
33
- seconds = round(frame_id.cpu().numpy()[0] * N / fps)
34
- return frame,f"Found at {str(datetime.timedelta(seconds=seconds))}"
35
 
36
 
37
  def inference(video, text):
@@ -81,7 +62,25 @@ def inference(video, text):
81
  # Print some stats
82
  print(f"Features: {video_features.shape}")
83
 
84
- return search_video(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  title = "Video Search"
87
  description = "demo for Anime2Sketch. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
 
11
 
12
  # Load the open CLIP model
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ model, preprocess = clip.load("ViT-B/32", device=device)
15
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
 
18
  def inference(video, text):
 
62
  # Print some stats
63
  print(f"Features: {video_features.shape}")
64
 
65
+
66
+ search_query=text
67
+ display_heatmap=False
68
+ display_results_count=1
69
+ # Encode and normalize the search query using CLIP
70
+ with torch.no_grad():
71
+ text_features = model.encode_text(clip.tokenize(search_query).to(device))
72
+ text_features /= text_features.norm(dim=-1, keepdim=True)
73
+
74
+ # Compute the similarity between the search query and each frame using the Cosine similarity
75
+ similarities = (100.0 * video_features @ text_features.T)
76
+ values, best_photo_idx = similarities.topk(display_results_count, dim=0)
77
+
78
+
79
+ for frame_id in best_photo_idx:
80
+ frame = video_frames[frame_id]
81
+ # Find the timestamp in the video and display it
82
+ seconds = round(frame_id.cpu().numpy()[0] * N / fps)
83
+ return frame,f"Found at {str(datetime.timedelta(seconds=seconds))}"
84
 
85
  title = "Video Search"
86
  description = "demo for Anime2Sketch. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."