Spaces:

Clementapa
/

orang-outan-image-video-detection

Running

App Files Files Community

orang-outan-image-video-detection / app.py

Clementapa

rename space

8c485d9 over 1 year ago

raw

history blame contribute delete

8.69 kB

	import os
	import os.path as osp
	from typing import List

	import cv2
	import gradio as gr
	import numpy as np
	import supervision as sv
	import torch
	from PIL import Image
	from supervision import Color
	from ultralytics import YOLO

	MARKDOWN = """
	<h1 style="text-align: center;"> WildGuardian: AI for Orangutan Ecosystem Surveillance 🦧🔍 </h1>

	## About the model 👁️
	This is a demo for my YOLOv8 nano trained for orangutan detection.\\
	The model was trained using only ~1000 images of orangutan [this dataset](https://images.cv/dataset/orangutan-image-classification-dataset) and [this dataset](https://www.kaggle.com/datasets/slothkong/10-monkey-species/data) containing ~1000 images used as background images.\\
	Annotations were obtained using zero shot object detection method GroundingDino.\

	The full pipeline can be found on my github repository: https://github.com/clementapa/orangutan-image-video-detection.

	## About the orangutans 🦧
	Because to habitat destruction, illicit poaching, and the pet trade, orangutans are in danger of going extinct. Their natural habitat has been significantly reduced by deforestation and the growth of palm oil plantations. Adult orangutans are occasionally sought for their body parts, and they are frequently captured and sold as pets. Climate change and disease are also taking a toll on their populations. Furthermore, it is concerning to note that they are limited to Borneo and Sumatra, two places on Earth. Sustainable practises and conservation initiatives are crucial to preventing the permanent extinction of these amazing animals.

	## AI for good 🌍
	Artificial Intelligence (AI) has unquestionable power in the realm of innovation and technology. Even though artificial intelligence (AI) has frequently been used for commercial advantage, it is important to stress that AI can also be used for more noble purposes, such as protecting the environment and the planet's future. We can build a more promising and sustainable future if we reorient AI's focus from business to improving our planet.
	"""

	EXAMPLES = []

	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	YOLO_MODEL = YOLO("train_7best.pt")

	BOX_ANNOTATOR = sv.BoxAnnotator(color=Color.from_hex("#FF00E4"))


	def annotate(
	image_bgr_numpy: Image.Image,
	detections: sv.Detections,
	annotator: sv.BoxAnnotator,
	labels: str,
	) -> Image.Image:
	thickness = 2
	text_thickness = 1
	text_scale = 1.0

	height, width, _ = image_bgr_numpy.shape

	thickness_ratio = ((width + height) / 2) / 400
	text_scale_ratio = ((width + height) / 2) / 600
	text_thickness_ratio = ((width + height) / 2) / 400

	annotator.thickness = int(thickness * thickness_ratio)
	annotator.text_scale = float(text_scale * text_scale_ratio)
	annotator.text_thickness = int(text_thickness * text_thickness_ratio)

	annotated_bgr_image = annotator.annotate(
	scene=image_bgr_numpy, detections=detections, labels=labels
	)
	return Image.fromarray(annotated_bgr_image[:, :, ::-1])


	def inference_image(image_rgb_pil: Image.Image, confidence: float) -> List[Image.Image]:
	output = YOLO_MODEL(image_rgb_pil, imgsz=640, verbose=False)[0]
	detections = sv.Detections.from_ultralytics(output)

	detections = detections[detections.confidence >= confidence]

	labels = [
	f"{output.names[class_id]} {confidence:0.2f}"
	for _, _, confidence, class_id, _ in detections
	]

	return annotate(
	image_bgr_numpy=output.orig_img.copy(),
	detections=detections,
	annotator=BOX_ANNOTATOR,
	labels=labels,
	)


	def process_frame(frame: np.ndarray, confidence: float) -> np.ndarray:
	output = YOLO_MODEL(frame, imgsz=640, verbose=False)[0]

	detections = sv.Detections.from_ultralytics(output)

	detections = detections[detections.confidence >= confidence]

	labels = [
	f"{output.names[class_id]} {confidence:0.2f}"
	for _, _, confidence, class_id, _ in detections
	]

	thickness = 2
	text_thickness = 1
	text_scale = 1.0

	height, width, _ = output.orig_img.shape

	thickness_ratio = ((width + height) / 2) / 400
	text_scale_ratio = ((width + height) / 2) / 600
	text_thickness_ratio = ((width + height) / 2) / 400

	BOX_ANNOTATOR.thickness = int(thickness * thickness_ratio)
	BOX_ANNOTATOR.text_scale = float(text_scale * text_scale_ratio)
	BOX_ANNOTATOR.text_thickness = int(text_thickness * text_thickness_ratio)

	annotated_frame = BOX_ANNOTATOR.annotate(
	scene=output.orig_img.copy(), detections=detections, labels=labels
	)
	return annotated_frame


	def inference_video(path_video, confidence):
	path_output_video = "temp.mp4"
	video_capture = cv2.VideoCapture(path_video)

	# Check if the video file was successfully opened
	if not video_capture.isOpened():
	print("Error: Could not open video file.")
	exit()

	frame_width = int(video_capture.get(3))
	frame_height = int(video_capture.get(4))
	frame_rate = int(video_capture.get(5))

	fourcc = cv2.VideoWriter_fourcc(*"mp4v") # You can change the codec as needed
	out = cv2.VideoWriter(
	path_output_video, fourcc, frame_rate, (frame_width, frame_height)
	)

	while True:
	# Read a frame from the video
	ret, frame = video_capture.read()

	# Check if the video has ended
	if not ret:
	break

	# Do something with the frame (e.g., display it or process it)
	# For example, you can display the frame in a window
	annotated_frame = process_frame(frame, confidence=confidence)

	out.write(annotated_frame)

	# Release the video capture object and close any open windows
	video_capture.release()
	out.release()
	cv2.destroyAllWindows()

	return path_output_video


	custom_theme = gr.themes.Soft(primary_hue="green")
	with gr.Blocks(theme=custom_theme, css="style.css") as demo:
	gr.Markdown(MARKDOWN)

	with gr.Tab("Detect on an image 🖼️"):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(
	image_mode="RGB",
	sources=["upload", "clipboard"],
	type="pil",
	)
	example_folder = osp.join(
	osp.dirname(__file__), "resources/examples_images"
	)
	example_fns = [
	osp.join(example_folder, example)
	for example in os.listdir(example_folder)
	]
	gr.Examples(
	examples=example_fns,
	inputs=[input_image],
	outputs=[input_image],
	cache_examples=False,
	label="Examples (click one of the images below to start)",
	examples_per_page=10,
	)
	confidence_image_slider = gr.Slider(
	label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
	)
	submit_button_image = gr.Button("Let's find orangutans 🦧 !")
	output_image = gr.Image(label="Results", type="pil")

	with gr.Tab("Detect on a video 📹"):
	with gr.Row():
	with gr.Column():
	input_video = gr.Video(sources=["upload"])
	example_folder = osp.join(
	osp.dirname(__file__), "resources/examples_videos"
	)
	example_fns = [
	osp.join(example_folder, example)
	for example in os.listdir(example_folder)
	]
	gr.Examples(
	examples=example_fns,
	inputs=[input_video],
	outputs=[input_video],
	cache_examples=False,
	label="Examples (click one of the videos below to start)",
	examples_per_page=10,
	)
	confidence_video_slider = gr.Slider(
	label="Confidence", minimum=0.1, maximum=1.0, step=0.05, value=0.6
	)
	submit_button_video = gr.Button("Let's find orangutans 🦧 !")
	output_video = gr.Video(label="Results")

	submit_button_image.click(
	inference_image,
	inputs=[input_image, confidence_image_slider],
	outputs=output_image,
	queue=True,
	)

	submit_button_video.click(
	inference_video,
	inputs=[input_video, confidence_video_slider],
	outputs=output_video,
	queue=True,
	)

	if __name__ == "__main__":
	demo.queue(max_size=20, api_open=False).launch()