Spaces:

frogcho123
/

s2t2s

Running

frogcho123 commited on Jun 9, 2023

Commit

30318a6

•

1 Parent(s): cd6653c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,20 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 from gtts import gTTS
 def translate_voice(file, target_lang):
-    # Auto to text (STT)
-    model = whisper.load_model("base")
-    audio = whisper.load_audio(file.name)
     audio = whisper.pad_or_trim(audio)
-    mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
-    _, probs = model.detect_language(mel)
     options = whisper.DecodingOptions()
     result = whisper.decode(model, mel, options)

 from gtts import gTTS
 def translate_voice(file, target_lang):
+    # Load the model and switch to float32
+    model = whisper.load_model("base").float()
+# Load the audio
+    audio = whisper.load_audio(from_file)
+# Pad or trim the audio
     audio = whisper.pad_or_trim(audio)
+# Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
+    mel = whisper.log_mel_spectrogram(audio).to(model.device).float()  # convert to full-precision float32
+# Proceed with your language detection and decoding
+    _, probs = model.detect_language(mel)
     options = whisper.DecodingOptions()
     result = whisper.decode(model, mel, options)