Spaces:
Running
Running
frogcho123
commited on
Commit
•
30318a6
1
Parent(s):
cd6653c
Update app.py
Browse files
app.py
CHANGED
@@ -5,13 +5,20 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
5 |
from gtts import gTTS
|
6 |
|
7 |
def translate_voice(file, target_lang):
|
8 |
-
#
|
9 |
-
model = whisper.load_model("base")
|
10 |
-
|
|
|
|
|
|
|
|
|
11 |
audio = whisper.pad_or_trim(audio)
|
12 |
-
mel = whisper.log_mel_spectrogram(audio).to(model.device).float()
|
13 |
-
_, probs = model.detect_language(mel)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
15 |
options = whisper.DecodingOptions()
|
16 |
result = whisper.decode(model, mel, options)
|
17 |
|
|
|
5 |
from gtts import gTTS
|
6 |
|
7 |
def translate_voice(file, target_lang):
|
8 |
+
# Load the model and switch to float32
|
9 |
+
model = whisper.load_model("base").float()
|
10 |
+
|
11 |
+
# Load the audio
|
12 |
+
audio = whisper.load_audio(from_file)
|
13 |
+
|
14 |
+
# Pad or trim the audio
|
15 |
audio = whisper.pad_or_trim(audio)
|
|
|
|
|
16 |
|
17 |
+
# Convert the audio to a log Mel spectrogram and move it to the same device as the model (CPU in your case)
|
18 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device).float() # convert to full-precision float32
|
19 |
+
|
20 |
+
# Proceed with your language detection and decoding
|
21 |
+
_, probs = model.detect_language(mel)
|
22 |
options = whisper.DecodingOptions()
|
23 |
result = whisper.decode(model, mel, options)
|
24 |
|