archit11 commited on
Commit
d1fe899
1 Parent(s): d3dda12

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +86 -0
README.md CHANGED
@@ -32,6 +32,89 @@ More information needed
32
 
33
  ## Intended uses & limitations
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  Usage:
36
  ```python
37
  import av
@@ -87,6 +170,7 @@ def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
87
  file_path = hf_hub_download(
88
  repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
89
  )
 
90
  container = av.open(file_path)
91
 
92
  # sample 16 frames
@@ -106,6 +190,8 @@ with torch.no_grad():
106
  predicted_label = logits.argmax(-1).item()
107
  print(model.config.id2label[predicted_label])
108
  ```
 
 
109
  ## Training and evaluation data
110
 
111
  More information needed
 
32
 
33
  ## Intended uses & limitations
34
 
35
+ ## Inference using phone camera (have to download ipwebcam on phone from playstore)
36
+ ```python
37
+ import cv2
38
+ import torch
39
+ import numpy as np
40
+ from transformers import AutoImageProcessor, VideoMAEForVideoClassification
41
+
42
+ np.random.seed(0)
43
+
44
+ def preprocess_frames(frames, image_processor):
45
+ inputs = image_processor(frames, return_tensors="pt")
46
+ inputs = {k: v.to(device) for k, v in inputs.items()} # Move tensors to GPU
47
+ return inputs
48
+
49
+ # Initialize the video capture object, replace ip addr with the local ip of your phone (will be shown in the ipwebcam app)
50
+ cap = cv2.VideoCapture('http://192.168.229.98:8080/video')
51
+
52
+ # Set the frame size (optional)
53
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
54
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
55
+
56
+ image_processor = AutoImageProcessor.from_pretrained("archit11/videomae-base-finetuned-ucfcrime-full")
57
+ model = VideoMAEForVideoClassification.from_pretrained("archit11/videomae-base-finetuned-ucfcrime-full")
58
+
59
+ # Move the model to GPU
60
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
61
+ model = model.to(device)
62
+
63
+ frame_buffer = []
64
+ buffer_size = 16
65
+ previous_labels = []
66
+ top_confidences = [] # Initialize top_confidences
67
+
68
+ while True:
69
+ ret, frame = cap.read()
70
+
71
+ if not ret:
72
+ print("Failed to capture frame")
73
+ break
74
+
75
+ # Add the current frame to the buffer
76
+ frame_buffer.append(frame)
77
+
78
+ # Check if we have enough frames for inference
79
+ if len(frame_buffer) >= buffer_size:
80
+ # Preprocess the frames
81
+ inputs = preprocess_frames(frame_buffer, image_processor)
82
+
83
+ with torch.no_grad():
84
+ outputs = model(**inputs)
85
+ logits = outputs.logits
86
+
87
+ # Get the top 3 predicted labels and their confidence scores
88
+ top_k = 3
89
+ probs = torch.softmax(logits, dim=-1)
90
+ top_probs, top_indices = torch.topk(probs, top_k)
91
+ top_labels = [model.config.id2label[idx.item()] for idx in top_indices[0]]
92
+ top_confidences = top_probs[0].tolist() # Update top_confidences
93
+
94
+ # Check if the predicted labels are different from the previous labels
95
+ if top_labels != previous_labels:
96
+ previous_labels = top_labels
97
+ print("Predicted class:", top_labels[0]) # Print the predicted class for debugging
98
+
99
+ # Clear the frame buffer and continue from the next frame
100
+ frame_buffer.clear()
101
+
102
+ # Display the predicted labels and confidence scores on the frame
103
+ for i, (label, confidence) in enumerate(zip(previous_labels, top_confidences)):
104
+ label_text = f"{label}: {confidence:.2f}"
105
+ cv2.putText(frame, label_text, (10, 30 + i * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 0, 255), 2)
106
+
107
+ # Display the resulting frame
108
+ cv2.imshow('Video', frame)
109
+
110
+ if cv2.waitKey(1) & 0xFF == ord('q'):
111
+ break
112
+
113
+ # Release everything when done
114
+ cap.release()
115
+ cv2.destroyAllWindows()
116
+ ```
117
+ ## Simple usage
118
  Usage:
119
  ```python
120
  import av
 
170
  file_path = hf_hub_download(
171
  repo_id="nielsr/video-demo", filename="eating_spaghetti.mp4", repo_type="dataset"
172
  )
173
+ # use any other video just replace `file_path` with the video path
174
  container = av.open(file_path)
175
 
176
  # sample 16 frames
 
190
  predicted_label = logits.argmax(-1).item()
191
  print(model.config.id2label[predicted_label])
192
  ```
193
+
194
+ # Inference Using
195
  ## Training and evaluation data
196
 
197
  More information needed