Add app.py

Files changed (10) hide show

Voice2VoiceTranslation.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (851 Bytes). View file

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (1.23 kB). View file

__pycache__/my_transcribe.cpython-310.pyc ADDED Viewed

Binary file (750 Bytes). View file

__pycache__/my_transcribe.cpython-311.pyc ADDED Viewed

Binary file (903 Bytes). View file

__pycache__/my_translate.cpython-310.pyc ADDED Viewed

Binary file (1.12 kB). View file

__pycache__/my_tts.cpython-310.pyc ADDED Viewed

Binary file (1.33 kB). View file

__pycache__/my_tts.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/my_tts.cpython-311.pyc and b/__pycache__/my_tts.cpython-311.pyc differ

app.py ADDED Viewed

+from my_transcribe import transcribe_audio_locally
+from my_translate import translate_text
+from my_tts import text_to_speech
+def voice_to_voice(audio_file_path):
+    # Step 1: Transcribe
+    result = transcribe_audio_locally(audio_file_path, model_size="base")
+    source_text = result["text"]
+    print("Transcribed:", source_text)
+    # Step 2: Translate
+    translated = translate_text(source_text, from_lang="en", to_lang="hi")
+    print("Translated:", translated)
+    # Step 3: Text to Speech
+    output_audio_path = text_to_speech(translated, "v2/hi_speaker_2")
+    print("Saved translated speech to:", output_audio_path)
+    return output_audio_path
+if __name__ == "__main__":
+    voice_to_voice("Input Audio Sample.wav")

my_tts.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from transformers import BarkModel, AutoProcessor
 import torch
 def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
     """
@@ -34,5 +35,9 @@ def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
     # generate speech
     speech_output = model.generate(**inputs)
     sampling_rate = model.generation_config.sample_rate
-    return speech_output, sampling_rate

 from transformers import BarkModel, AutoProcessor
 import torch
+import scipy
 def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
     """
     # generate speech
     speech_output = model.generate(**inputs)
     sampling_rate = model.generation_config.sample_rate
+    path = "output_audio.wav"
+    # Save the generated audio to a fileimport scipy
+    scipy.io.wavfile.write("output_audio.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())
+    return path