frogcho123 commited on
Commit
0d7f26b
1 Parent(s): ae8d075

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -14,7 +14,9 @@ model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
14
  def translate_speech(audio, target_lang):
15
  if isinstance(audio, tuple):
16
  audio = audio[0]
17
- audio = audio.astype("float32") # Convert audio to float32
 
 
18
  audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
19
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
20
  _, probs = whisper_model.detect_language(mel)
@@ -37,6 +39,7 @@ def translate_speech(audio, target_lang):
37
 
38
 
39
 
 
40
  def translate_speech_interface(audio, target_lang):
41
  translated_audio = translate_speech(audio, target_lang)
42
  translated_audio_bytes = open(translated_audio, "rb").read()
 
14
  def translate_speech(audio, target_lang):
15
  if isinstance(audio, tuple):
16
  audio = audio[0]
17
+ if isinstance(audio, int):
18
+ audio = [audio]
19
+ audio = np.array(audio).astype("float32") # Convert audio to float32
20
  audio = whisper.pad_or_trim(audio, whisper_model.audio_config.sample_rate)
21
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
22
  _, probs = whisper_model.detect_language(mel)
 
39
 
40
 
41
 
42
+
43
  def translate_speech_interface(audio, target_lang):
44
  translated_audio = translate_speech(audio, target_lang)
45
  translated_audio_bytes = open(translated_audio, "rb").read()