Spaces:
Runtime error
Runtime error
frogcho123
commited on
Commit
•
29135e4
1
Parent(s):
b9553d2
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
|
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
13 |
|
14 |
def translate_speech(audio, target_lang):
|
15 |
-
audio = audio.astype("float32")
|
16 |
audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
|
17 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
18 |
_, probs = whisper_model.detect_language(mel)
|
@@ -21,7 +21,7 @@ def translate_speech(audio, target_lang):
|
|
21 |
text = result.text
|
22 |
|
23 |
# Translate text
|
24 |
-
tokenizer.src_lang = target_lang
|
25 |
encoded_text = tokenizer(text, return_tensors="pt")
|
26 |
generated_tokens = model.generate(**encoded_text)
|
27 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
@@ -33,6 +33,7 @@ def translate_speech(audio, target_lang):
|
|
33 |
|
34 |
return audio_path
|
35 |
|
|
|
36 |
def translate_speech_interface(audio, target_lang):
|
37 |
translated_audio = translate_speech(audio, target_lang)
|
38 |
translated_audio_bytes = open(translated_audio, "rb").read()
|
|
|
12 |
model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
|
13 |
|
14 |
def translate_speech(audio, target_lang):
|
15 |
+
audio = audio[0].astype("float32") # Extract audio from tuple and convert to float32
|
16 |
audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
|
17 |
mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
|
18 |
_, probs = whisper_model.detect_language(mel)
|
|
|
21 |
text = result.text
|
22 |
|
23 |
# Translate text
|
24 |
+
tokenizer.src_lang = target_lang
|
25 |
encoded_text = tokenizer(text, return_tensors="pt")
|
26 |
generated_tokens = model.generate(**encoded_text)
|
27 |
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
|
|
33 |
|
34 |
return audio_path
|
35 |
|
36 |
+
|
37 |
def translate_speech_interface(audio, target_lang):
|
38 |
translated_audio = translate_speech(audio, target_lang)
|
39 |
translated_audio_bytes = open(translated_audio, "rb").read()
|