File size: 8,699 Bytes
f66a906 1316948 57ecc83 55c5649 57ecc83 1316948 57ecc83 82f7f8f 57ecc83 3b4bcce fd3c45b 3b4bcce fd3c45b 3b4bcce fd3c45b 57ecc83 55c5649 57ecc83 82f7f8f 57ecc83 82f7f8f 57ecc83 1316948 82f7f8f 57ecc83 55c5649 1316948 fd3c45b 1316948 55c5649 1316948 55c5649 1316948 55c5649 1316948 55c5649 1316948 fd3c45b 1316948 f66a906 0f6afe8 f66a906 1316948 f66a906 1316948 f66a906 0f6afe8 f66a906 55c5649 1316948 fd3c45b 57ecc83 1316948 55c5649 1316948 57ecc83 fd3c45b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
import os
import os.path as osp
from typing import List
import cv2
import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from supervision import Color
from ultralytics import YOLO
MARKDOWN = """
<h1 style="text-align: center;"> WildGuardian: AI for Orangutan Ecosystem Surveillance 🦧🔍 </h1>
## About the model 👁️
This is a demo for my YOLOv8 nano trained for orang outan detection.\\
The model was trained using only ~1000 images of orang outans [this dataset](https://images.cv/dataset/orangutan-image-classification-dataset) and [this dataset](https://www.kaggle.com/datasets/slothkong/10-monkey-species/data) containing ~1000 images used as background images.\\
Annotations were obtained using zero shot object detection method GroundingDino.\
The full pipeline can be found on my github repository: https://github.com/clementapa/orang-outan-image-video-detection.
## About the orang outans 🦧
Because to habitat destruction, illicit poaching, and the pet trade, orangutans are in danger of going extinct. Their natural habitat has been significantly reduced by deforestation and the growth of palm oil plantations. Adult orangutans are occasionally sought for their body parts, and they are frequently captured and sold as pets. Climate change and disease are also taking a toll on their populations. Furthermore, it is concerning to note that they are limited to Borneo and Sumatra, two places on Earth. Sustainable practises and conservation initiatives are crucial to preventing the permanent extinction of these amazing animals.
## AI for good 🌍
Artificial Intelligence (AI) has unquestionable power in the realm of innovation and technology. Even though artificial intelligence (AI) has frequently been used for commercial advantage, it is important to stress that AI can also be used for more noble purposes, such as protecting the environment and the planet's future. We can build a more promising and sustainable future if we reorient AI's focus from business to improving our planet.
"""
EXAMPLES = []
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
YOLO_MODEL = YOLO("train_7best.pt")
BOX_ANNOTATOR = sv.BoxAnnotator(color=Color.from_hex("#FF00E4"))
def annotate(
image_bgr_numpy: Image.Image,
detections: sv.Detections,
annotator: sv.BoxAnnotator,
labels: str,
) -> Image.Image:
thickness = 2
text_thickness = 1
text_scale = 1.0
height, width, _ = image_bgr_numpy.shape
thickness_ratio = ((width + height) / 2) / 400
text_scale_ratio = ((width + height) / 2) / 600
text_thickness_ratio = ((width + height) / 2) / 400
annotator.thickness = int(thickness * thickness_ratio)
annotator.text_scale = float(text_scale * text_scale_ratio)
annotator.text_thickness = int(text_thickness * text_thickness_ratio)
annotated_bgr_image = annotator.annotate(
scene=image_bgr_numpy, detections=detections, labels=labels
)
return Image.fromarray(annotated_bgr_image[:, :, ::-1])
def inference_image(image_rgb_pil: Image.Image, confidence: float) -> List[Image.Image]:
output = YOLO_MODEL(image_rgb_pil, imgsz=640, verbose=False)[0]
detections = sv.Detections.from_ultralytics(output)
detections = detections[detections.confidence >= confidence]
labels = [
f"{output.names[class_id]} {confidence:0.2f}"
for _, _, confidence, class_id, _ in detections
]
return annotate(
image_bgr_numpy=output.orig_img.copy(),
detections=detections,
annotator=BOX_ANNOTATOR,
labels=labels,
)
def process_frame(frame: np.ndarray, confidence: float) -> np.ndarray:
output = YOLO_MODEL(frame, imgsz=640, verbose=False)[0]
detections = sv.Detections.from_ultralytics(output)
detections = detections[detections.confidence >= confidence]
labels = [
f"{output.names[class_id]} {confidence:0.2f}"
for _, _, confidence, class_id, _ in detections
]
thickness = 2
text_thickness = 1
text_scale = 1.0
height, width, _ = output.orig_img.shape
thickness_ratio = ((width + height) / 2) / 400
text_scale_ratio = ((width + height) / 2) / 600
text_thickness_ratio = ((width + height) / 2) / 400
BOX_ANNOTATOR.thickness = int(thickness * thickness_ratio)
BOX_ANNOTATOR.text_scale = float(text_scale * text_scale_ratio)
BOX_ANNOTATOR.text_thickness = int(text_thickness * text_thickness_ratio)
annotated_frame = BOX_ANNOTATOR.annotate(
scene=output.orig_img.copy(), detections=detections, labels=labels
)
return annotated_frame
def inference_video(path_video, confidence):
path_output_video = "temp.mp4"
video_capture = cv2.VideoCapture(path_video)
# Check if the video file was successfully opened
if not video_capture.isOpened():
print("Error: Could not open video file.")
exit()
frame_width = int(video_capture.get(3))
frame_height = int(video_capture.get(4))
frame_rate = int(video_capture.get(5))
fourcc = cv2.VideoWriter_fourcc(*"mp4v") # You can change the codec as needed
out = cv2.VideoWriter(
path_output_video, fourcc, frame_rate, (frame_width, frame_height)
)
while True:
# Read a frame from the video
ret, frame = video_capture.read()
# Check if the video has ended
if not ret:
break
# Do something with the frame (e.g., display it or process it)
# For example, you can display the frame in a window
annotated_frame = process_frame(frame, confidence=confidence)
out.write(annotated_frame)
# Release the video capture object and close any open windows
video_capture.release()
out.release()
cv2.destroyAllWindows()
return path_output_video
custom_theme = gr.themes.Soft(primary_hue="green")
with gr.Blocks(theme=custom_theme, css="style.css") as demo:
gr.Markdown(MARKDOWN)
with gr.Tab("Detect on an image 🖼️"):
with gr.Row():
with gr.Column():
input_image = gr.Image(
image_mode="RGB",
sources=["upload", "clipboard"],
type="pil",
)
example_folder = osp.join(
osp.dirname(__file__), "resources/examples_images"
)
example_fns = [
osp.join(example_folder, example)
for example in os.listdir(example_folder)
]
gr.Examples(
examples=example_fns,
inputs=[input_image],
outputs=[input_image],
cache_examples=False,
label="Examples (click one of the images below to start)",
examples_per_page=10,
)
confidence_image_slider = gr.Slider(
label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
)
submit_button_image = gr.Button("Let's find orang outans 🦧 !")
output_image = gr.Image(label="Results", type="pil")
with gr.Tab("Detect on a video 📹"):
with gr.Row():
with gr.Column():
input_video = gr.Video(sources=["upload"])
example_folder = osp.join(
osp.dirname(__file__), "resources/examples_videos"
)
example_fns = [
osp.join(example_folder, example)
for example in os.listdir(example_folder)
]
gr.Examples(
examples=example_fns,
inputs=[input_video],
outputs=[input_video],
cache_examples=False,
label="Examples (click one of the images below to start)",
examples_per_page=10,
)
confidence_video_slider = gr.Slider(
label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
)
submit_button_video = gr.Button("Let's find orang outans 🦧 !")
output_video = gr.Video(label="Results")
submit_button_image.click(
inference_image,
inputs=[input_image, confidence_image_slider],
outputs=output_image,
queue=True,
)
submit_button_video.click(
inference_video,
inputs=[input_video, confidence_video_slider],
outputs=output_video,
queue=True,
)
if __name__ == "__main__":
demo.queue(max_size=20, api_open=False).launch()
|