IbrahimHasani commited on
Commit
2c5687c
1 Parent(s): 53189f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -7
app.py CHANGED
@@ -3,14 +3,19 @@ import torch
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
- from decord import VideoReader, cpu
7
 
8
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
9
  CLIP_LEN = 32
10
 
11
- # Load model and processor once
 
 
 
 
12
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
13
- model = AutoModel.from_pretrained(MODEL_NAME)
 
14
 
15
  def sample_uniform_frame_indices(clip_len, seg_len):
16
  if seg_len < clip_len:
@@ -23,7 +28,9 @@ def sample_uniform_frame_indices(clip_len, seg_len):
23
  return np.array(indices).astype(np.int64)
24
 
25
  def read_video_decord(file_path, indices):
26
- vr = VideoReader(file_path, num_threads=1, ctx=cpu(0))
 
 
27
  video = vr.get_batch(indices).asnumpy()
28
  return video
29
 
@@ -55,6 +62,9 @@ def model_interface(uploaded_video, activity):
55
  padding=True,
56
  )
57
 
 
 
 
58
  with torch.no_grad():
59
  outputs = model(**inputs)
60
 
@@ -66,13 +76,13 @@ def model_interface(uploaded_video, activity):
66
  max_prob_index = torch.argmax(probs[0]).item()
67
  for i in range(len(activities_list)):
68
  current_activity = activities_list[i]
69
- prob = float(probs[0][i])
70
- logit = float(logits_per_video[0][i])
71
  results_probs.append((current_activity, f"Probability: {prob * 100:.2f}%"))
72
  results_logits.append((current_activity, f"Raw Score: {logit:.2f}"))
73
 
74
  likely_label = activities_list[max_prob_index]
75
- likely_probability = float(probs[0][max_prob_index]) * 100
76
 
77
  return concatenated_image, results_probs, results_logits, [ likely_label , likely_probability ]
78
 
 
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
+ from decord import VideoReader, cpu, gpu
7
 
8
  MODEL_NAME = "microsoft/xclip-base-patch16-zero-shot"
9
  CLIP_LEN = 32
10
 
11
+ # Check for GPU availability
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ print (device)
14
+
15
+ # Load model and processor once and move them to the GPU
16
  processor = AutoProcessor.from_pretrained(MODEL_NAME)
17
+ model = AutoModel.from_pretrained(MODEL_NAME).to(device)
18
+ model.eval()
19
 
20
  def sample_uniform_frame_indices(clip_len, seg_len):
21
  if seg_len < clip_len:
 
28
  return np.array(indices).astype(np.int64)
29
 
30
  def read_video_decord(file_path, indices):
31
+ # Use GPU for video decoding if available
32
+ vr_ctx = cpu(0)
33
+ vr = VideoReader(file_path, num_threads=1, ctx=vr_ctx)
34
  video = vr.get_batch(indices).asnumpy()
35
  return video
36
 
 
62
  padding=True,
63
  )
64
 
65
+ # Move inputs to GPU
66
+ inputs = {name: tensor.to(device) for name, tensor in inputs.items()}
67
+
68
  with torch.no_grad():
69
  outputs = model(**inputs)
70
 
 
76
  max_prob_index = torch.argmax(probs[0]).item()
77
  for i in range(len(activities_list)):
78
  current_activity = activities_list[i]
79
+ prob = float(probs[0][i].cpu())
80
+ logit = float(logits_per_video[0][i].cpu())
81
  results_probs.append((current_activity, f"Probability: {prob * 100:.2f}%"))
82
  results_logits.append((current_activity, f"Raw Score: {logit:.2f}"))
83
 
84
  likely_label = activities_list[max_prob_index]
85
+ likely_probability = float(probs[0][max_prob_index].cpu()) * 100
86
 
87
  return concatenated_image, results_probs, results_logits, [ likely_label , likely_probability ]
88