frogcho123 commited on
Commit
30318a6
1 Parent(s): cd6653c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -5
app.py CHANGED
@@ -5,13 +5,20 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from gtts import gTTS
6
 
7
  def translate_voice(file, target_lang):
8
- # Auto to text (STT)
9
- model = whisper.load_model("base")
10
- audio = whisper.load_audio(file.name)
 
 
 
 
11
  audio = whisper.pad_or_trim(audio)
12
- mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
13
- _, probs = model.detect_language(mel)
14
 
 
 
 
 
 
15
  options = whisper.DecodingOptions()
16
  result = whisper.decode(model, mel, options)
17
 
 
5
  from gtts import gTTS
6
 
7
  def translate_voice(file, target_lang):
8
+ # Load the model and switch to float32
9
+ model = whisper.load_model("base").float()
10
+
11
+ # Load the audio
12
+ audio = whisper.load_audio(from_file)
13
+
14
+ # Pad or trim the audio
15
  audio = whisper.pad_or_trim(audio)
 
 
16
 
17
+ # Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
18
+ mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
19
+
20
+ # Proceed with your language detection and decoding
21
+ _, probs = model.detect_language(mel)
22
  options = whisper.DecodingOptions()
23
  result = whisper.decode(model, mel, options)
24