crowbarmassage commited on
Commit
44f6e5d
1 Parent(s): 147fb27

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -11
app.py CHANGED
@@ -8,12 +8,6 @@ Original file is located at
8
  """
9
 
10
  # Beginning of Unit 7
11
- #!pip install git+https://github.com/huggingface/transformers.git
12
- !pip install torch accelerate torchaudio datasets gradio sentencepiece
13
- !pip install -U transformers
14
- #!pip install sacremoses
15
- #!pip install -Uqq datasets[audio]
16
- #!pip install git+https://github.com/huggingface/transformers
17
 
18
  from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
19
  import torch, torchaudio
@@ -85,7 +79,7 @@ def speech_to_speech_translation(audio):
85
  synthesised_speech = synthesise(translated_text)
86
  return 16000, synthesised_speech
87
 
88
- def adjusted_speech_to_speech_translation(audio_filepath):
89
  # Load the audio file
90
  waveform, sampling_rate = torchaudio.load(audio_filepath)
91
 
@@ -100,12 +94,11 @@ def adjusted_speech_to_speech_translation(audio_filepath):
100
  "sampling_rate": sampling_rate
101
  }
102
  }
103
-
104
  transcribed_text = transcribe(audio_dict)
105
  translated_text = translate(transcribed_text)
 
106
  #print(transcribed_text)
107
  #print(translated_text)
108
- synthesised_speech = synthesise(translated_text)
109
  #print(synthesised_speech)
110
  #print(torch.min(synthesised_speech), torch.max(synthesised_speech))
111
  synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
@@ -118,13 +111,13 @@ import gradio as gr
118
  demo = gr.Blocks()
119
 
120
  mic_translate = gr.Interface(
121
- fn=adjusted_speech_to_speech_translation,
122
  inputs=gr.Audio(source="microphone", type="filepath"),
123
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
124
  )
125
 
126
  file_translate = gr.Interface(
127
- fn=adjusted_speech_to_speech_translation,
128
  inputs=gr.Audio(source="upload", type="filepath"),
129
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
130
  )
 
8
  """
9
 
10
  # Beginning of Unit 7
 
 
 
 
 
 
11
 
12
  from transformers.models.markuplm.tokenization_markuplm import MARKUPLM_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING
13
  import torch, torchaudio
 
79
  synthesised_speech = synthesise(translated_text)
80
  return 16000, synthesised_speech
81
 
82
+ def speech_to_speech_translation(audio_filepath):
83
  # Load the audio file
84
  waveform, sampling_rate = torchaudio.load(audio_filepath)
85
 
 
94
  "sampling_rate": sampling_rate
95
  }
96
  }
 
97
  transcribed_text = transcribe(audio_dict)
98
  translated_text = translate(transcribed_text)
99
+ synthesised_speech = synthesise(translated_text)
100
  #print(transcribed_text)
101
  #print(translated_text)
 
102
  #print(synthesised_speech)
103
  #print(torch.min(synthesised_speech), torch.max(synthesised_speech))
104
  synthesised_speech = (synthesised_speech * 32767).numpy().astype(np.int16)
 
111
  demo = gr.Blocks()
112
 
113
  mic_translate = gr.Interface(
114
+ fn=speech_to_speech_translation,
115
  inputs=gr.Audio(source="microphone", type="filepath"),
116
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
117
  )
118
 
119
  file_translate = gr.Interface(
120
+ fn=speech_to_speech_translation,
121
  inputs=gr.Audio(source="upload", type="filepath"),
122
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
123
  )