import os import aiofiles import librosa from espnet_onnx import Speech2Text from fastapi import Form, UploadFile from pydantic import BaseModel class AudioInput(BaseModel): audio: UploadFile = Form() async def audio_from_file(file): if os.path.exists("sample.mp3"): os.remove("sample.mp3") async with aiofiles.open("sample.mp3", "wb") as out_file: content = file.read() # async read await out_file.write(content) # async write audio, _ = librosa.load("sample.mp3", sr=16000) return audio class ASRModel: def __init__(self): self.model = None self.model_path = "models/asr" def load_model(self, model_path): self.model = Speech2Text(model_dir=f"{self.model_path}/{model_path}") def generate(self, audio): if self.model is None: raise RuntimeError("Model is not loaded.") hyp = self.model(audio) return hyp[0][0]