from fastapi import FastAPI, HTTPException, UploadFile, File from pydantic import BaseModel from multiprocessing import Process, Queue from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor import torch import io import uvicorn import soundfile as sf app = FastAPI() # Cargar el modelo y el procesador model_name = "facebook/wav2vec2-large-960h-lv60" processor = Wav2Vec2Processor.from_pretrained(model_name) model = Wav2Vec2ForCTC.from_pretrained(model_name) class TranscriptionRequest(BaseModel): file: UploadFile def transcribe_audio(file, queue): try: audio, _ = sf.read(io.BytesIO(file.file.read())) input_values = processor(audio, return_tensors="pt", padding="longest").input_values logits = model(input_values).logits predicted_ids = torch.argmax(logits, dim=-1) transcription = processor.batch_decode(predicted_ids)[0] queue.put(transcription) except Exception as e: queue.put(f"Error: {str(e)}") @app.post("/transcribe_audio") async def transcribe_audio(file: UploadFile = File(...)): queue = Queue() p = Process(target=transcribe_audio, args=(file, queue)) p.start() p.join() response = queue.get() if "Error" in response: raise HTTPException(status_code=500, detail=response) return {"transcription": response} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8006)