frogcho123 commited on
Commit
6cfff67
1 Parent(s): 9cfc5dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -4,6 +4,7 @@ import whisper
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from gtts import gTTS
6
  import IPython.display as ipd
 
7
 
8
  # Load Whisper STT model
9
  whisper_model = whisper.load_model("base")
@@ -12,9 +13,14 @@ whisper_model = whisper.load_model("base")
12
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
13
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
14
 
15
- def translate_speech(audio_file, target_lang):
 
 
 
 
 
16
  # Load audio
17
- audio = whisper.load_audio(audio_file)
18
  audio = whisper.pad_or_trim(audio)
19
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
20
 
@@ -41,18 +47,15 @@ def translate_speech(audio_file, target_lang):
41
  return audio_path
42
 
43
  def translate_speech_interface(audio, target_lang):
44
- audio_path = "recorded_audio.wav"
45
- with open(audio_path, "wb") as f:
46
- f.write(audio.read())
47
-
48
- translated_audio = translate_speech(audio_path, target_lang)
49
  translated_audio = open(translated_audio, "rb")
50
 
51
  return translated_audio
52
 
53
  # Define the Gradio interface
54
- audio_recording = gr.inputs.Audio(source="microphone", type="wav", label="Record your speech")
55
  target_language = gr.inputs.Dropdown(["en", "ru", "fr"], label="Target Language")
56
  output_audio = gr.outputs.Audio(type="audio/mpeg", label="Translated Audio")
57
 
58
  gr.Interface(fn=translate_speech_interface, inputs=[audio_recording, target_language], outputs=output_audio, title="Speech Translator").launch()
 
 
4
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  from gtts import gTTS
6
  import IPython.display as ipd
7
+ import numpy as np
8
 
9
  # Load Whisper STT model
10
  whisper_model = whisper.load_model("base")
 
13
  tokenizer = AutoTokenizer.from_pretrained("alirezamsh/small100")
14
  model = AutoModelForSeq2SeqLM.from_pretrained("alirezamsh/small100")
15
 
16
+ def translate_speech(audio, target_lang):
17
+ # Save audio as a temporary file
18
+ audio_path = "recorded_audio.wav"
19
+ with open(audio_path, "wb") as f:
20
+ f.write(audio)
21
+
22
  # Load audio
23
+ audio = whisper.load_audio(audio_path)
24
  audio = whisper.pad_or_trim(audio)
25
  mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
26
 
 
47
  return audio_path
48
 
49
  def translate_speech_interface(audio, target_lang):
50
+ translated_audio = translate_speech(audio, target_lang)
 
 
 
 
51
  translated_audio = open(translated_audio, "rb")
52
 
53
  return translated_audio
54
 
55
  # Define the Gradio interface
56
+ audio_recording = gr.inputs.Audio(source="microphone", type="numpy", label="Record your speech")
57
  target_language = gr.inputs.Dropdown(["en", "ru", "fr"], label="Target Language")
58
  output_audio = gr.outputs.Audio(type="audio/mpeg", label="Translated Audio")
59
 
60
  gr.Interface(fn=translate_speech_interface, inputs=[audio_recording, target_language], outputs=output_audio, title="Speech Translator").launch()
61
+