import os import cv2 import numpy as np import importlib.util import gradio as gr from PIL import Image # Load the TensorFlow Lite model MODEL_DIR = 'model' MODEL_DIRS = { 'Multi-class model': 'model', 'Empty class': 'model_2', 'Misalignment class': 'model_3' } # Function to load model based on selection def load_model(model_name): selected_model_dir = MODEL_DIRS.get(model_name, MODEL_DIR) graph_name = 'detect.tflite' if model_name == 'Multi-class model' else f'detect_{model_name.lower().replace(" ", "_")}.tflite' labelmap_name = 'labelmap.txt' if model_name == 'Multi-class model' else f'labelmap_{model_name.lower().replace(" ", "_")}.txt' path_to_ckpt = os.path.join(selected_model_dir, graph_name) path_to_labels = os.path.join(selected_model_dir, labelmap_name) return path_to_ckpt, path_to_labels pkg = importlib.util.find_spec('tflite_runtime') if pkg: from tflite_runtime.interpreter import Interpreter from tflite_runtime.interpreter import load_delegate else: from tensorflow.lite.python.interpreter import Interpreter from tensorflow.lite.python.interpreter import load_delegate # Load the label map def load_labels(path_to_labels): with open(path_to_labels, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del(labels[0]) return labels def load_interpreter(model_path): interpreter = Interpreter(model_path=model_path) interpreter.allocate_tensors() return interpreter class ModelDetector: def __init__(self, model_name): self.model_path, self.label_path = load_model(model_name) self.labels = load_labels(self.label_path) self.interpreter = load_interpreter(self.model_path) input_details = self.interpreter.get_input_details() output_details = self.interpreter.get_output_details() self.height = input_details[0]['shape'][1] self.width = input_details[0]['shape'][2] self.floating_model = (input_details[0]['dtype'] == np.float32) self.input_mean = 127.5 self.input_std = 127.5 outname = output_details[0]['name'] if ('StatefulPartitionedCall' in outname): self.boxes_idx, self.classes_idx, self.scores_idx = 1, 3, 0 else: self.boxes_idx, self.classes_idx, self.scores_idx = 0, 1, 2 def perform_detection(self, image): imH, imW, _ = image.shape image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_resized = cv2.resize(image_rgb, (self.width, self.height)) input_data = np.expand_dims(image_resized, axis=0) if self.floating_model: input_data = (np.float32(input_data) - self.input_mean) / self.input_std self.interpreter.set_tensor(self.interpreter.get_input_details()[0]['index'], input_data) self.interpreter.invoke() boxes = self.interpreter.get_tensor(self.interpreter.get_output_details()[self.boxes_idx]['index'])[0] classes = self.interpreter.get_tensor(self.interpreter.get_output_details()[self.classes_idx]['index'])[0] scores = self.interpreter.get_tensor(self.interpreter.get_output_details()[self.scores_idx]['index'])[0] detections = [] for i in range(len(scores)): if ((scores[i] > 0.5) and (scores[i] <= 1.0)): ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) object_name = self.labels[int(classes[i])] label = '%s: %d%%' % (object_name, int(scores[i] * 100)) labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max(ymin, labelSize[1] + 10) cv2.rectangle(image, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0], label_ymin + baseLine - 10), (255, 255, 255), cv2.FILLED) cv2.putText(image, label, (xmin, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) detections.append([object_name, scores[i], xmin, ymin, xmax, ymax]) return image def resize_image(image, size=640): return cv2.resize(image, (size, size)) def detect_image(input_image, model_detector): image = np.array(input_image) resized_image = resize_image(image, size=640) # Resize input image result_image = model_detector.perform_detection(resized_image) return Image.fromarray(result_image) def detect_video(input_video, model_detector): cap = cv2.VideoCapture(input_video) frames = [] while cap.isOpened(): ret, frame = cap.read() if not ret: break resized_frame = resize_image(frame, size=640) # Resize each frame result_frame = model_detector.perform_detection(resized_frame) frames.append(result_frame) cap.release() if not frames: raise ValueError("No frames were read from the video.") height, width, layers = frames[0].shape size = (width, height) output_video_path = "result_" + os.path.basename(input_video) out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 15, size) for frame in frames: out.write(frame) out.release() return output_video_path app = gr.Blocks() with app: gr.Label("Select Model:") model_selector = gr.Dropdown(choices=list(MODEL_DIRS.keys()), label="Multi-class model") with gr.Tab("Image Detection"): gr.Markdown("Upload an image for object detection") image_input = gr.Image(type="pil", label="Upload an image") image_output = gr.Image(type="pil", label="Detection Result") gr.Button("Submit").click(fn=detect_image, inputs=[image_input, model_selector], outputs=image_output) with gr.Tab("Video Detection"): gr.Markdown("Upload a video for object detection") video_input = gr.Video(label="Upload a video") video_output = gr.Video(label="Detection Result") gr.Button("Submit").click(fn=detect_video, inputs=[video_input, model_selector], outputs=video_output) app.launch()