import cv2 import numpy as np import tensorflow as tf import time import random from typing import Any, Dict, List class EndpointHandler: def __init__(self, path=""): self.model = tf.saved_model.load(f'{path}/my_model') self.classes_1 = ["RoadAccidents", "Fighting", "NormalVideos"] self.locations = ['Miami', 'Smouha', 'Mandara', 'Sporting', 'Montazah'] def preprocess_frame(self, frame): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = cv2.resize(frame, (224, 224)) frame = frame.astype('float32') / 255.0 frame = np.expand_dims(frame, axis=0) return frame def get_top_k(self, probs, k=1, label_map=None): if label_map is None: label_map = self.classes_1 top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k] top_labels = tf.gather(label_map, top_predictions, axis=-1) top_labels = [label.decode('utf8') for label in top_labels.numpy()] top_probs = tf.gather(probs, top_predictions, axis=-1).numpy() return top_labels[0] def perform_action_recognition(self, url, k=1): cap = cv2.VideoCapture(url) start_time = time.time() while True: ret, frame = cap.read() if not ret: break preprocessed_frame = self.preprocess_frame(frame) outputs = self.model.signatures['serving_default'](image=preprocessed_frame[tf.newaxis]) probs = tf.nn.softmax(outputs['classifier_head_1']) current_time = time.time() - start_time m, s = divmod(current_time, 60) h, m = divmod(m, 60) ip_address = url.split("/")[-1] output = { "class": self.get_top_k(probs[0], k=k), "elapsed_time": f"{int(h):02d}:{int(m):02d}:{int(s):02d}", "location": random.choice(self.locations), "ip_address": ip_address } yield output cap.release() def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: url = data.get("url") if url is None: raise ValueError("'url' is missing from the request body") if not isinstance(url, str): raise ValueError(f"Expected 'url' to be a str, but found {type(url)}") outputs = [] for output in self.perform_action_recognition(url): outputs.append(output) return outputs