import base64 | |
from typing import Any, List, Tuple | |
import numpy as np | |
from numpy.typing import NDArray | |
def load_audio(audio_path: str) -> Tuple[NDArray[np.float32], int]: | |
import librosa | |
audio, sr = librosa.load(audio_path, sr=None) | |
import pdb | |
pdb.set_trace() # Debugging breakpoint | |
return (audio.astype(np.float32), int(sr)) | |
def load_image(image_path: str) -> str: | |
with open(image_path, "rb") as image_file: | |
encoded_image = base64.b64encode(image_file.read()).decode("utf-8") | |
return encoded_image | |
def load_video(video_path: str, frame_num: int = 5) -> List[NDArray[np.uint8]]: | |
import cv2 | |
cap = cv2.VideoCapture(video_path) | |
frames: List[np.ndarray[np.uint8, Any]] = [] | |
try: | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
frames.append(frame.astype(np.uint8)) | |
finally: | |
cap.release() | |
if len(frames) >= frame_num: | |
step = len(frames) // frame_num | |
frames = [frames[i] for i in range(0, len(frames), step)] | |
return frames | |