Uhhy commited on
Commit
a003783
1 Parent(s): a524c1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -1,22 +1,30 @@
1
  from fastapi import FastAPI, HTTPException, UploadFile, File
2
  from pydantic import BaseModel
3
  from multiprocessing import Process, Queue
4
- import whisper
 
5
  import io
6
  import uvicorn
 
7
 
8
  app = FastAPI()
9
 
10
- model = whisper.load_model("large")
 
 
 
11
 
12
  class TranscriptionRequest(BaseModel):
13
  file: UploadFile
14
 
15
  def transcribe_audio(file, queue):
16
  try:
17
- audio = io.BytesIO(file.file.read())
18
- result = model.transcribe(audio)
19
- queue.put(result["text"])
 
 
 
20
  except Exception as e:
21
  queue.put(f"Error: {str(e)}")
22
 
 
1
  from fastapi import FastAPI, HTTPException, UploadFile, File
2
  from pydantic import BaseModel
3
  from multiprocessing import Process, Queue
4
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
5
+ import torch
6
  import io
7
  import uvicorn
8
+ import soundfile as sf
9
 
10
  app = FastAPI()
11
 
12
+ # Cargar el modelo y el procesador
13
+ model_name = "facebook/wav2vec2-large-960h-lv60"
14
+ processor = Wav2Vec2Processor.from_pretrained(model_name)
15
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
16
 
17
  class TranscriptionRequest(BaseModel):
18
  file: UploadFile
19
 
20
  def transcribe_audio(file, queue):
21
  try:
22
+ audio, _ = sf.read(io.BytesIO(file.file.read()))
23
+ input_values = processor(audio, return_tensors="pt", padding="longest").input_values
24
+ logits = model(input_values).logits
25
+ predicted_ids = torch.argmax(logits, dim=-1)
26
+ transcription = processor.batch_decode(predicted_ids)[0]
27
+ queue.put(transcription)
28
  except Exception as e:
29
  queue.put(f"Error: {str(e)}")
30