Spaces:

Clementapa
/

orang-outan-image-video-detection

Sleeping

App Files Files Community

Clementapa commited on Nov 5, 2023

Commit

55c5649

•

1 Parent(s): 180ea55

Change inference video

Browse files

Files changed (1) hide show

app.py +45 -13

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import os.path as osp
 from typing import List
 import gradio as gr
 import numpy as np
 import supervision as sv
@@ -16,7 +17,7 @@ MARKDOWN = """
 ## About the model 👁️
 This is a demo for my YOLOv8 nano trained for orang outan detection.\\
 The model was trained using [this dataset](https://images.cv/dataset/orangutan-image-classification-dataset)
-for orang outan images and [this dataset](https://www.kaggle.com/datasets/slothkong/10-monkey-species/data) as background images. Annotations were obtained using zero shot object detection method GroundingDino.\\
 The code can be found on my github repository: https://github.com/clementapa/orang-outan-image-video-detection.
@@ -25,9 +26,8 @@ Because to habitat destruction, illicit poaching, and the pet trade, orangutans
 ## AI for good 🌍
 Artificial Intelligence (AI) has unquestionable power in the realm of innovation and technology. Even though artificial intelligence (AI) has frequently been used for commercial advantage, it is important to stress that AI can also be used for more noble purposes, such as protecting the environment and the planet's future. We can build a more promising and sustainable future if we reorient AI's focus from business to improving our planet.
 """
 EXAMPLES = []
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -82,11 +82,13 @@ def inference_image(image_rgb_pil: Image.Image, confidence: float) -> List[Image
     )
-def process_frame(frame: np.ndarray, _) -> np.ndarray:
     output = YOLO_MODEL(frame, imgsz=640, verbose=False)[0]
     detections = sv.Detections.from_ultralytics(output)
     labels = [
         f"{output.names[class_id]} {confidence:0.2f}"
         for _, _, confidence, class_id, _ in detections
@@ -112,13 +114,43 @@ def process_frame(frame: np.ndarray, _) -> np.ndarray:
     return annotated_frame
-def inference_video(path_video):
     path_output_video = "temp.mp4"
-    sv.process_video(
-        source_path=path_video,
-        target_path=path_output_video,
-        callback=process_frame,
     )
     return path_output_video
@@ -174,9 +206,9 @@ with gr.Blocks(theme=custom_theme, css="style.css") as demo:
                     label="Examples (click one of the images below to start)",
                     examples_per_page=10,
                 )
-                # confidence_video_slider = gr.Slider(
-                #     label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
-                # )
                 submit_button_video = gr.Button("Let's find orang outans 🦧 !")
             output_video = gr.Video(label="Results")
@@ -189,7 +221,7 @@ with gr.Blocks(theme=custom_theme, css="style.css") as demo:
     submit_button_video.click(
         inference_video,
-        inputs=[input_video],
         outputs=output_video,
         queue=True,
     )

 import os.path as osp
 from typing import List
+import cv2
 import gradio as gr
 import numpy as np
 import supervision as sv
 ## About the model 👁️
 This is a demo for my YOLOv8 nano trained for orang outan detection.\\
 The model was trained using [this dataset](https://images.cv/dataset/orangutan-image-classification-dataset)
+for orang outan images and [this dataset](https://www.kaggle.com/datasets/slothkong/10-monkey-species/data) as background images. Annotations were obtained using zero shot object detection method GroundingDino.\
 The code can be found on my github repository: https://github.com/clementapa/orang-outan-image-video-detection.
 ## AI for good 🌍
 Artificial Intelligence (AI) has unquestionable power in the realm of innovation and technology. Even though artificial intelligence (AI) has frequently been used for commercial advantage, it is important to stress that AI can also be used for more noble purposes, such as protecting the environment and the planet's future. We can build a more promising and sustainable future if we reorient AI's focus from business to improving our planet.
 """
 EXAMPLES = []
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     )
+def process_frame(frame: np.ndarray, confidence: float) -> np.ndarray:
     output = YOLO_MODEL(frame, imgsz=640, verbose=False)[0]
     detections = sv.Detections.from_ultralytics(output)
+    detections = detections[detections.confidence >= confidence]
     labels = [
         f"{output.names[class_id]} {confidence:0.2f}"
         for _, _, confidence, class_id, _ in detections
     return annotated_frame
+def inference_video(path_video, confidence):
     path_output_video = "temp.mp4"
+    video_capture = cv2.VideoCapture(path_video)
+    # Check if the video file was successfully opened
+    if not video_capture.isOpened():
+        print("Error: Could not open video file.")
+        exit()
+    frame_width = int(video_capture.get(3))
+    frame_height = int(video_capture.get(4))
+    frame_rate = int(video_capture.get(5))
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # You can change the codec as needed
+    out = cv2.VideoWriter(
+        path_output_video, fourcc, frame_rate, (frame_width, frame_height)
     )
+    while True:
+        # Read a frame from the video
+        ret, frame = video_capture.read()
+        # Check if the video has ended
+        if not ret:
+            break
+        # Do something with the frame (e.g., display it or process it)
+        # For example, you can display the frame in a window
+        annotated_frame = process_frame(frame, confidence=confidence)
+        out.write(annotated_frame)
+    # Release the video capture object and close any open windows
+    video_capture.release()
+    out.release()
+    cv2.destroyAllWindows()
     return path_output_video
                     label="Examples (click one of the images below to start)",
                     examples_per_page=10,
                 )
+                confidence_video_slider = gr.Slider(
+                    label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
+                )
                 submit_button_video = gr.Button("Let's find orang outans 🦧 !")
             output_video = gr.Video(label="Results")
     submit_button_video.click(
         inference_video,
+        inputs=[input_video, confidence_video_slider],
         outputs=output_video,
         queue=True,
     )