rohitptnk commited on
Commit
d000c57
·
1 Parent(s): 6fea31a

Add app.py

Browse files
Voice2VoiceTranslation.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
__pycache__/app.cpython-310.pyc ADDED
Binary file (851 Bytes). View file
 
__pycache__/app.cpython-311.pyc ADDED
Binary file (1.23 kB). View file
 
__pycache__/my_transcribe.cpython-310.pyc ADDED
Binary file (750 Bytes). View file
 
__pycache__/my_transcribe.cpython-311.pyc ADDED
Binary file (903 Bytes). View file
 
__pycache__/my_translate.cpython-310.pyc ADDED
Binary file (1.12 kB). View file
 
__pycache__/my_tts.cpython-310.pyc ADDED
Binary file (1.33 kB). View file
 
__pycache__/my_tts.cpython-311.pyc CHANGED
Binary files a/__pycache__/my_tts.cpython-311.pyc and b/__pycache__/my_tts.cpython-311.pyc differ
 
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from my_transcribe import transcribe_audio_locally
2
+ from my_translate import translate_text
3
+ from my_tts import text_to_speech
4
+
5
+ def voice_to_voice(audio_file_path):
6
+ # Step 1: Transcribe
7
+ result = transcribe_audio_locally(audio_file_path, model_size="base")
8
+ source_text = result["text"]
9
+ print("Transcribed:", source_text)
10
+
11
+ # Step 2: Translate
12
+ translated = translate_text(source_text, from_lang="en", to_lang="hi")
13
+ print("Translated:", translated)
14
+
15
+ # Step 3: Text to Speech
16
+ output_audio_path = text_to_speech(translated, "v2/hi_speaker_2")
17
+ print("Saved translated speech to:", output_audio_path)
18
+
19
+ return output_audio_path
20
+
21
+ if __name__ == "__main__":
22
+ voice_to_voice("Input Audio Sample.wav")
my_tts.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import BarkModel, AutoProcessor
2
  import torch
 
3
 
4
  def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
5
  """
@@ -34,5 +35,9 @@ def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
34
  # generate speech
35
  speech_output = model.generate(**inputs)
36
  sampling_rate = model.generation_config.sample_rate
 
37
 
38
- return speech_output, sampling_rate
 
 
 
 
1
  from transformers import BarkModel, AutoProcessor
2
  import torch
3
+ import scipy
4
 
5
  def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
6
  """
 
35
  # generate speech
36
  speech_output = model.generate(**inputs)
37
  sampling_rate = model.generation_config.sample_rate
38
+ path = "output_audio.wav"
39
 
40
+ # Save the generated audio to a fileimport scipy
41
+ scipy.io.wavfile.write("output_audio.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())
42
+
43
+ return path