Spaces:

objectDetection
/

23A464A

Sleeping

App Files Files Community

apailang commited on Dec 27, 2023

Commit

c1d9208

•

1 Parent(s): 685deac

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -69,46 +69,49 @@ def predict2(image_np):
     return result_pil_img
 def detect_video(video):
-    video_reader = cv2.VideoCapture(video)
-    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
-    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
-    fps = video_reader.get(cv2.CAP_PROP_FPS)
-    video_writer = cv2.VideoWriter(video_out_filepath,
-                               cv2.VideoWriter_fourcc(*'mp4v'),
-                               fps,
-                               (frame_w, frame_h))
-    for i in tqdm(range(nb_frames)):
-        ret, image_np = video_reader.read()
-        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.uint8)
-        results = detection_model(input_tensor)
-        viz_utils.visualize_boxes_and_labels_on_image_array(
-                  image_np,
-                  results['detection_boxes'][0].numpy(),
-                  (results['detection_classes'][0].numpy()+ label_id_offset).astype(int),
-                  results['detection_scores'][0].numpy(),
-                  category_index,
-                  use_normalized_coordinates=True,
-                  max_boxes_to_draw=200,
-                  min_score_thresh=.50,
-                  agnostic_mode=False,
-                  line_thickness=2)
-        video_writer.write(np.uint8(image_np))
-    # Release camera and close windows
-    video_reader.release()
-    video_writer.release()
-    cv2.destroyAllWindows()
-    cv2.waitKey(1)
 label_id_offset = 0
 REPO_ID = "apailang/mytfodmodel"
 detection_model = load_model()
 # pil_image = Image.open(image_path)
 # image_arr = pil_image_as_numpy_array(pil_image)
@@ -138,8 +141,6 @@ tts_demo = gr.Interface(
     cache_examples=True
     )#.launch(share=True)
-samples_folder = 'data'
 a = os.path.join(os.path.dirname(__file__), "data/a.mp4")  # Video
 b = os.path.join(os.path.dirname(__file__), "data/b.mp4")  # Video
 c = os.path.join(os.path.dirname(__file__), "data/c.mp4")  # Video
@@ -150,7 +151,7 @@ video_out_file = os.path.join(samples_folder,'detected' + '.mp4')
 stt_demo = gr.Interface(
     fn=detect_video,
     inputs=gr.Video(),
-    outputs="data/detected.mp4",
     examples=[
         [a],
         [b],
@@ -158,6 +159,7 @@ stt_demo = gr.Interface(
     ],
     cache_examples=False
 )
 demo = gr.TabbedInterface([tts_demo, stt_demo], ["Image", "Video"])
 if __name__ == "__main__":

     return result_pil_img
 def detect_video(video):
+    # Create a video capture object
+    cap = cv2.VideoCapture(video)
+    # Process frames in a loop
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # Expand dimensions since model expects images to have shape: [1, None, None, 3]
+        image_np_expanded = np.expand_dims(frame, axis=0)
+        # Run inference
+        output_dict = model(image_np_expanded)
+        # Extract detections
+        boxes = output_dict['detection_boxes'][0].numpy()
+        scores = output_dict['detection_scores'][0].numpy()
+        classes = output_dict['detection_classes'][0].numpy().astype(np.int64)
+        # Draw bounding boxes and labels
+        image_np_with_detections = viz_utils.visualize_boxes_and_labels_on_image_array(
+            frame,
+            boxes,
+            classes,
+            scores,
+            category_index,
+            use_normalized_coordinates=True,
+            max_boxes_to_draw=20,
+            min_score_thresh=.5,
+            agnostic_mode=False)
+        # Yield the processed frame
+        yield image_np_with_detections
+    # Release resources
+    cap.release()
 label_id_offset = 0
 REPO_ID = "apailang/mytfodmodel"
 detection_model = load_model()
+samples_folder = 'data'
 # pil_image = Image.open(image_path)
 # image_arr = pil_image_as_numpy_array(pil_image)
     cache_examples=True
     )#.launch(share=True)
 a = os.path.join(os.path.dirname(__file__), "data/a.mp4")  # Video
 b = os.path.join(os.path.dirname(__file__), "data/b.mp4")  # Video
 c = os.path.join(os.path.dirname(__file__), "data/c.mp4")  # Video
 stt_demo = gr.Interface(
     fn=detect_video,
     inputs=gr.Video(),
+    utputs=gr.Video(label="Detected Video"),,
     examples=[
         [a],
         [b],
     ],
     cache_examples=False
 )
 demo = gr.TabbedInterface([tts_demo, stt_demo], ["Image", "Video"])
 if __name__ == "__main__":