IbrahimHasani commited on
Commit
a23243f
1 Parent(s): 41f8ba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -17
app.py CHANGED
@@ -3,11 +3,7 @@ import torch
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
- from decord import VideoReader, cpu, gpu
7
- import cv2
8
-
9
- print(f"Is CUDA available: {torch.cuda.is_available()}")
10
- print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
11
 
12
  def sample_uniform_frame_indices(clip_len, seg_len):
13
  if seg_len < clip_len:
@@ -20,7 +16,7 @@ def sample_uniform_frame_indices(clip_len, seg_len):
20
  return np.array(indices).astype(np.int64)
21
 
22
  def read_video_decord(file_path, indices):
23
- vr = VideoReader(file_path, num_threads=1, ctx=cpu(0))#gpu(0) if torch.cuda.is_available() else cpu(0))
24
  video = vr.get_batch(indices).asnumpy()
25
  return video
26
 
@@ -49,30 +45,25 @@ def model_interface(uploaded_video, model_choice, activity):
49
  "microsoft/xclip-base-patch32-16-frames": 16,
50
  "microsoft/xclip-base-patch32": 8
51
  }.get(model_choice, 32)
52
-
53
  indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
54
  video = read_video_decord(uploaded_video, indices)
55
  concatenated_image = concatenate_frames(video, clip_len)
56
 
57
- # Convert list of numpy arrays to a single numpy ndarray
58
- video_np = np.array(video)
59
-
60
  activities_list = [activity, "other"]
61
  processor = AutoProcessor.from_pretrained(model_choice)
62
- model = AutoModel.from_pretrained(model_choice).to('cuda')
63
-
64
  inputs = processor(
65
  text=activities_list,
66
- videos=video_np, # Use the ndarray instead of the list
67
  return_tensors="pt",
68
  padding=True,
69
  )
70
- inputs = {name: tensor.to('cuda') for name, tensor in inputs.items()}
71
 
72
  with torch.no_grad():
73
  outputs = model(**inputs)
74
 
75
- logits_per_video = outputs.logits_per_video.cpu()
76
  probs = logits_per_video.softmax(dim=1)
77
 
78
  results_probs = []
@@ -88,7 +79,7 @@ def model_interface(uploaded_video, model_choice, activity):
88
  likely_label = activities_list[max_prob_index]
89
  likely_probability = float(probs[0][max_prob_index]) * 100
90
 
91
- return concatenated_image, results_probs, results_logits, [likely_label, likely_probability]
92
 
93
  iface = gr.Interface(
94
  fn=model_interface,
@@ -110,4 +101,4 @@ iface = gr.Interface(
110
  live=False
111
  )
112
 
113
- iface.launch()
 
3
  import numpy as np
4
  from transformers import AutoProcessor, AutoModel
5
  from PIL import Image
6
+ from decord import VideoReader, cpu
 
 
 
 
7
 
8
  def sample_uniform_frame_indices(clip_len, seg_len):
9
  if seg_len < clip_len:
 
16
  return np.array(indices).astype(np.int64)
17
 
18
  def read_video_decord(file_path, indices):
19
+ vr = VideoReader(file_path, num_threads=1, ctx=cpu(0))
20
  video = vr.get_batch(indices).asnumpy()
21
  return video
22
 
 
45
  "microsoft/xclip-base-patch32-16-frames": 16,
46
  "microsoft/xclip-base-patch32": 8
47
  }.get(model_choice, 32)
 
48
  indices = sample_uniform_frame_indices(clip_len, seg_len=len(VideoReader(uploaded_video)))
49
  video = read_video_decord(uploaded_video, indices)
50
  concatenated_image = concatenate_frames(video, clip_len)
51
 
52
+ # Appending "other" to the list of activities
 
 
53
  activities_list = [activity, "other"]
54
  processor = AutoProcessor.from_pretrained(model_choice)
55
+ model = AutoModel.from_pretrained(model_choice)
 
56
  inputs = processor(
57
  text=activities_list,
58
+ videos=list(video),
59
  return_tensors="pt",
60
  padding=True,
61
  )
 
62
 
63
  with torch.no_grad():
64
  outputs = model(**inputs)
65
 
66
+ logits_per_video = outputs.logits_per_video
67
  probs = logits_per_video.softmax(dim=1)
68
 
69
  results_probs = []
 
79
  likely_label = activities_list[max_prob_index]
80
  likely_probability = float(probs[0][max_prob_index]) * 100
81
 
82
+ return concatenated_image, results_probs, results_logits, [ likely_label , likely_probability ]
83
 
84
  iface = gr.Interface(
85
  fn=model_interface,
 
101
  live=False
102
  )
103
 
104
+ iface.launch()