File size: 8,699 Bytes
f66a906
1316948
57ecc83
 
55c5649
57ecc83
1316948
57ecc83
 
 
82f7f8f
57ecc83
 
 
3b4bcce
fd3c45b
 
 
3b4bcce
 
fd3c45b
3b4bcce
fd3c45b
 
 
 
 
 
57ecc83
55c5649
57ecc83
 
 
 
 
 
82f7f8f
57ecc83
 
 
 
 
 
 
 
82f7f8f
 
 
 
 
 
 
 
 
 
 
 
 
 
57ecc83
 
 
 
 
 
1316948
82f7f8f
57ecc83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55c5649
1316948
fd3c45b
1316948
 
55c5649
 
1316948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55c5649
1316948
55c5649
 
 
 
 
 
 
 
 
 
 
 
 
 
1316948
55c5649
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1316948
 
 
 
fd3c45b
 
 
1316948
 
 
 
 
 
 
 
f66a906
0f6afe8
f66a906
 
 
 
 
 
 
 
 
 
 
 
 
1316948
 
 
 
 
f66a906
1316948
 
 
 
f66a906
0f6afe8
f66a906
 
 
 
 
 
 
 
 
 
 
 
 
55c5649
 
 
1316948
 
 
 
 
 
fd3c45b
 
57ecc83
 
1316948
 
55c5649
1316948
 
 
57ecc83
 
fd3c45b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
import os
import os.path as osp
from typing import List

import cv2
import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from supervision import Color
from ultralytics import YOLO

MARKDOWN = """
<h1 style="text-align: center;"> WildGuardian: AI for Orangutan Ecosystem Surveillance 🦧🔍 </h1>

## About the model 👁️
This is a demo for my YOLOv8 nano trained for orang outan detection.\\
The model was trained using only ~1000 images of orang outans [this dataset](https://images.cv/dataset/orangutan-image-classification-dataset) and [this dataset](https://www.kaggle.com/datasets/slothkong/10-monkey-species/data) containing ~1000 images used as background images.\\
Annotations were obtained using zero shot object detection method GroundingDino.\

The full pipeline can be found on my github repository: https://github.com/clementapa/orang-outan-image-video-detection.

## About the orang outans 🦧
Because to habitat destruction, illicit poaching, and the pet trade, orangutans are in danger of going extinct. Their natural habitat has been significantly reduced by deforestation and the growth of palm oil plantations. Adult orangutans are occasionally sought for their body parts, and they are frequently captured and sold as pets. Climate change and disease are also taking a toll on their populations. Furthermore, it is concerning to note that they are limited to Borneo and Sumatra, two places on Earth. Sustainable practises and conservation initiatives are crucial to preventing the permanent extinction of these amazing animals.

## AI for good 🌍
Artificial Intelligence (AI) has unquestionable power in the realm of innovation and technology. Even though artificial intelligence (AI) has frequently been used for commercial advantage, it is important to stress that AI can also be used for more noble purposes, such as protecting the environment and the planet's future. We can build a more promising and sustainable future if we reorient AI's focus from business to improving our planet.
"""

EXAMPLES = []

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

YOLO_MODEL = YOLO("train_7best.pt")

BOX_ANNOTATOR = sv.BoxAnnotator(color=Color.from_hex("#FF00E4"))


def annotate(
    image_bgr_numpy: Image.Image,
    detections: sv.Detections,
    annotator: sv.BoxAnnotator,
    labels: str,
) -> Image.Image:
    thickness = 2
    text_thickness = 1
    text_scale = 1.0

    height, width, _ = image_bgr_numpy.shape

    thickness_ratio = ((width + height) / 2) / 400
    text_scale_ratio = ((width + height) / 2) / 600
    text_thickness_ratio = ((width + height) / 2) / 400

    annotator.thickness = int(thickness * thickness_ratio)
    annotator.text_scale = float(text_scale * text_scale_ratio)
    annotator.text_thickness = int(text_thickness * text_thickness_ratio)

    annotated_bgr_image = annotator.annotate(
        scene=image_bgr_numpy, detections=detections, labels=labels
    )
    return Image.fromarray(annotated_bgr_image[:, :, ::-1])


def inference_image(image_rgb_pil: Image.Image, confidence: float) -> List[Image.Image]:
    output = YOLO_MODEL(image_rgb_pil, imgsz=640, verbose=False)[0]
    detections = sv.Detections.from_ultralytics(output)

    detections = detections[detections.confidence >= confidence]

    labels = [
        f"{output.names[class_id]} {confidence:0.2f}"
        for _, _, confidence, class_id, _ in detections
    ]

    return annotate(
        image_bgr_numpy=output.orig_img.copy(),
        detections=detections,
        annotator=BOX_ANNOTATOR,
        labels=labels,
    )


def process_frame(frame: np.ndarray, confidence: float) -> np.ndarray:
    output = YOLO_MODEL(frame, imgsz=640, verbose=False)[0]

    detections = sv.Detections.from_ultralytics(output)

    detections = detections[detections.confidence >= confidence]

    labels = [
        f"{output.names[class_id]} {confidence:0.2f}"
        for _, _, confidence, class_id, _ in detections
    ]

    thickness = 2
    text_thickness = 1
    text_scale = 1.0

    height, width, _ = output.orig_img.shape

    thickness_ratio = ((width + height) / 2) / 400
    text_scale_ratio = ((width + height) / 2) / 600
    text_thickness_ratio = ((width + height) / 2) / 400

    BOX_ANNOTATOR.thickness = int(thickness * thickness_ratio)
    BOX_ANNOTATOR.text_scale = float(text_scale * text_scale_ratio)
    BOX_ANNOTATOR.text_thickness = int(text_thickness * text_thickness_ratio)

    annotated_frame = BOX_ANNOTATOR.annotate(
        scene=output.orig_img.copy(), detections=detections, labels=labels
    )
    return annotated_frame


def inference_video(path_video, confidence):
    path_output_video = "temp.mp4"
    video_capture = cv2.VideoCapture(path_video)

    # Check if the video file was successfully opened
    if not video_capture.isOpened():
        print("Error: Could not open video file.")
        exit()

    frame_width = int(video_capture.get(3))
    frame_height = int(video_capture.get(4))
    frame_rate = int(video_capture.get(5))

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # You can change the codec as needed
    out = cv2.VideoWriter(
        path_output_video, fourcc, frame_rate, (frame_width, frame_height)
    )

    while True:
        # Read a frame from the video
        ret, frame = video_capture.read()

        # Check if the video has ended
        if not ret:
            break

        # Do something with the frame (e.g., display it or process it)
        # For example, you can display the frame in a window
        annotated_frame = process_frame(frame, confidence=confidence)

        out.write(annotated_frame)

    # Release the video capture object and close any open windows
    video_capture.release()
    out.release()
    cv2.destroyAllWindows()

    return path_output_video


custom_theme = gr.themes.Soft(primary_hue="green")
with gr.Blocks(theme=custom_theme, css="style.css") as demo:
    gr.Markdown(MARKDOWN)

    with gr.Tab("Detect on an image 🖼️"):
        with gr.Row():
            with gr.Column():
                input_image = gr.Image(
                    image_mode="RGB",
                    sources=["upload", "clipboard"],
                    type="pil",
                )
                example_folder = osp.join(
                    osp.dirname(__file__), "resources/examples_images"
                )
                example_fns = [
                    osp.join(example_folder, example)
                    for example in os.listdir(example_folder)
                ]
                gr.Examples(
                    examples=example_fns,
                    inputs=[input_image],
                    outputs=[input_image],
                    cache_examples=False,
                    label="Examples (click one of the images below to start)",
                    examples_per_page=10,
                )
                confidence_image_slider = gr.Slider(
                    label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
                )
                submit_button_image = gr.Button("Let's find orang outans 🦧 !")
            output_image = gr.Image(label="Results", type="pil")

    with gr.Tab("Detect on a video 📹"):
        with gr.Row():
            with gr.Column():
                input_video = gr.Video(sources=["upload"])
                example_folder = osp.join(
                    osp.dirname(__file__), "resources/examples_videos"
                )
                example_fns = [
                    osp.join(example_folder, example)
                    for example in os.listdir(example_folder)
                ]
                gr.Examples(
                    examples=example_fns,
                    inputs=[input_video],
                    outputs=[input_video],
                    cache_examples=False,
                    label="Examples (click one of the images below to start)",
                    examples_per_page=10,
                )
                confidence_video_slider = gr.Slider(
                    label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
                )
                submit_button_video = gr.Button("Let's find orang outans 🦧 !")
            output_video = gr.Video(label="Results")

    submit_button_image.click(
        inference_image,
        inputs=[input_image, confidence_image_slider],
        outputs=output_image,
        queue=True,
    )

    submit_button_video.click(
        inference_video,
        inputs=[input_video, confidence_video_slider],
        outputs=output_video,
        queue=True,
    )

if __name__ == "__main__":
    demo.queue(max_size=20, api_open=False).launch()