Tonic commited on
Commit
678c468
1 Parent(s): 8fa6df8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -8
app.py CHANGED
@@ -2,19 +2,27 @@ import gradio as gr
2
  import torchaudio
3
  import torch
4
 
 
 
 
 
 
 
 
 
5
 
6
- def speech_to_text(audio_file):
7
- audio_input, _ = torchaudio.load(audio_file.name)
8
  s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
9
  with torch.no_grad():
10
- text = s2t_model(audio_input, tgt_lang=TGT_LANG)
11
  return text
12
 
13
- def speech_to_speech_translation(audio_file):
14
- audio_input, _ = torchaudio.load(audio_file.name)
15
  s2st_model = torch.jit.load("unity_on_device.ptl")
16
  with torch.no_grad():
17
- text, units, waveform = s2st_model(audio_input, tgt_lang=TGT_LANG)
18
  output_file = "/tmp/result.wav"
19
  torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
20
  return text, output_file
@@ -22,14 +30,20 @@ def speech_to_speech_translation(audio_file):
22
  # Gradio interfaces
23
  iface_s2t = gr.Interface(
24
  fn=speech_to_text,
25
- inputs=gr.Audio(type="file", label="Upload Audio for Speech to Text"),
 
 
 
26
  outputs="text",
27
  title="Speech to Text"
28
  )
29
 
30
  iface_s2st = gr.Interface(
31
  fn=speech_to_speech_translation,
32
- inputs=gr.Audio(type="file", label="Upload Audio for Speech to Speech Translation"),
 
 
 
33
  outputs=["text", "audio"],
34
  title="Speech to Speech Translation"
35
  )
 
2
  import torchaudio
3
  import torch
4
 
5
+ # Define the list of target languages
6
+ languages = {
7
+ "English": "eng",
8
+ "Hindi": "hin",
9
+ "Portuguese": "por",
10
+ "Russian": "rus",
11
+ "Spanish": "spa"
12
+ }
13
 
14
+ def speech_to_text(audio_data, tgt_lang):
15
+ audio_input, _ = torchaudio.load(audio_data)
16
  s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
17
  with torch.no_grad():
18
+ text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
19
  return text
20
 
21
+ def speech_to_speech_translation(audio_data, tgt_lang):
22
+ audio_input, _ = torchaudio.load(audio_data)
23
  s2st_model = torch.jit.load("unity_on_device.ptl")
24
  with torch.no_grad():
25
+ text, units, waveform = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
26
  output_file = "/tmp/result.wav"
27
  torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
28
  return text, output_file
 
30
  # Gradio interfaces
31
  iface_s2t = gr.Interface(
32
  fn=speech_to_text,
33
+ inputs=[
34
+ gr.inputs.Audio(label="Upload or Record Audio for Speech to Text"),
35
+ gr.inputs.Dropdown(list(languages.keys()), label="Select Target Language")
36
+ ],
37
  outputs="text",
38
  title="Speech to Text"
39
  )
40
 
41
  iface_s2st = gr.Interface(
42
  fn=speech_to_speech_translation,
43
+ inputs=[
44
+ gr.inputs.Audio(label="Upload or Record Audio for Speech to Speech Translation"),
45
+ gr.inputs.Dropdown(list(languages.keys()), label="Select Target Language")
46
+ ],
47
  outputs=["text", "audio"],
48
  title="Speech to Speech Translation"
49
  )