Uhhy's picture
Update app.py
2e04e29 verified
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
from multiprocessing import Process, Queue
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import io
import uvicorn
import soundfile as sf
app = FastAPI()
# Cargar el modelo y el procesador
model_name = "facebook/wav2vec2-large-960h-lv60"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForCTC.from_pretrained(model_name)
class TranscriptionRequest(BaseModel):
file: UploadFile
def transcribe_audio(file, queue):
try:
audio, _ = sf.read(io.BytesIO(file.file.read()))
input_values = processor(audio, return_tensors="pt", padding="longest").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
queue.put(transcription)
except Exception as e:
queue.put(f"Error: {str(e)}")
@app.post("/transcribe_audio")
async def transcribe_audio(file: UploadFile = File(...)):
queue = Queue()
p = Process(target=transcribe_audio, args=(file, queue))
p.start()
p.join()
response = queue.get()
if "Error" in response:
raise HTTPException(status_code=500, detail=response)
return {"transcription": response}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)