import base64 from typing import Any, List, Tuple import numpy as np from numpy.typing import NDArray def load_audio(audio_path: str) -> Tuple[NDArray[np.float32], int]: import librosa audio, sr = librosa.load(audio_path, sr=None) import pdb pdb.set_trace() # Debugging breakpoint return (audio.astype(np.float32), int(sr)) def load_image(image_path: str) -> str: with open(image_path, "rb") as image_file: encoded_image = base64.b64encode(image_file.read()).decode("utf-8") return encoded_image def load_video(video_path: str, frame_num: int = 5) -> List[NDArray[np.uint8]]: import cv2 cap = cv2.VideoCapture(video_path) frames: List[np.ndarray[np.uint8, Any]] = [] try: while True: ret, frame = cap.read() if not ret: break frames.append(frame.astype(np.uint8)) finally: cap.release() if len(frames) >= frame_num: step = len(frames) // frame_num frames = [frames[i] for i in range(0, len(frames), step)] return frames