import cv2 from PIL import Image import numpy as np def preprocess_image(image): """ Preprocesses an input image for prediction. Args: image (Union[str, numpy.ndarray]): File path to an image or a numpy array. Returns: PIL.Image: Preprocessed image. """ if isinstance(image, str): # Handle file path image = Image.open(image).convert("RGB") elif isinstance(image, np.ndarray): # Handle numpy array image = Image.fromarray(image).convert("RGB") else: raise ValueError("Unsupported image type. Must be a file path or numpy array.") return image def resize_shorter_side(img, min_length): """ Resize the shorter side of img to min_length while preserving the aspect ratio. """ ow, oh = img.size mult = 8 if ow < oh: if ow == min_length and oh % mult == 0: return img, (ow, oh) w = min_length h = int(min_length * oh / ow) else: if oh == min_length and ow % mult == 0: return img, (ow, oh) h = min_length w = int(min_length * ow / oh) return img.resize((w, h), Image.BICUBIC), (w, h) def generate_local_image(image): """ Detects the face in the input image and extracts it as a 'local image'. If no face is detected, returns the global image as the local image. Args: image (Union[PIL.Image, numpy.ndarray]): The input image. Returns: PIL.Image: The cropped face or the original image if no face is detected. """ # Convert numpy array to PIL.Image if necessary if isinstance(image, np.ndarray): image = Image.fromarray(image) # Convert PIL image to OpenCV format for face detection image_cv = np.array(image) image_gray = cv2.cvtColor(image_cv, cv2.COLOR_RGB2GRAY) # Load OpenCV's pre-trained Haar Cascade for face detection face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") faces = face_cascade.detectMultiScale(image_gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)) if len(faces) == 0: print("No face detected. Using the global image as the local image.") return image # Return the global image as fallback # Use the first detected face x, y, w, h = faces[0] # Crop the face region face_image = image.crop((x, y, x + w, y + h)) return face_image def preprocess_video(video_path, frame_count=32): cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return [] # Return an empty list if video can't be opened frames = [] total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) if total_frames == 0: return [] # Handle videos with no frames interval = max(1, total_frames // frame_count) for i in range(frame_count): cap.set(cv2.CAP_PROP_POS_FRAMES, i * interval) ret, frame = cap.read() if ret: frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) cap.release() return frames