Spaces:
Runtime error
Runtime error
Add app.py
Browse files- Voice2VoiceTranslation.ipynb +0 -0
- __pycache__/app.cpython-310.pyc +0 -0
- __pycache__/app.cpython-311.pyc +0 -0
- __pycache__/my_transcribe.cpython-310.pyc +0 -0
- __pycache__/my_transcribe.cpython-311.pyc +0 -0
- __pycache__/my_translate.cpython-310.pyc +0 -0
- __pycache__/my_tts.cpython-310.pyc +0 -0
- __pycache__/my_tts.cpython-311.pyc +0 -0
- app.py +22 -0
- my_tts.py +6 -1
Voice2VoiceTranslation.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
__pycache__/app.cpython-310.pyc
ADDED
|
Binary file (851 Bytes). View file
|
|
|
__pycache__/app.cpython-311.pyc
ADDED
|
Binary file (1.23 kB). View file
|
|
|
__pycache__/my_transcribe.cpython-310.pyc
ADDED
|
Binary file (750 Bytes). View file
|
|
|
__pycache__/my_transcribe.cpython-311.pyc
ADDED
|
Binary file (903 Bytes). View file
|
|
|
__pycache__/my_translate.cpython-310.pyc
ADDED
|
Binary file (1.12 kB). View file
|
|
|
__pycache__/my_tts.cpython-310.pyc
ADDED
|
Binary file (1.33 kB). View file
|
|
|
__pycache__/my_tts.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/my_tts.cpython-311.pyc and b/__pycache__/my_tts.cpython-311.pyc differ
|
|
|
app.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from my_transcribe import transcribe_audio_locally
|
| 2 |
+
from my_translate import translate_text
|
| 3 |
+
from my_tts import text_to_speech
|
| 4 |
+
|
| 5 |
+
def voice_to_voice(audio_file_path):
|
| 6 |
+
# Step 1: Transcribe
|
| 7 |
+
result = transcribe_audio_locally(audio_file_path, model_size="base")
|
| 8 |
+
source_text = result["text"]
|
| 9 |
+
print("Transcribed:", source_text)
|
| 10 |
+
|
| 11 |
+
# Step 2: Translate
|
| 12 |
+
translated = translate_text(source_text, from_lang="en", to_lang="hi")
|
| 13 |
+
print("Translated:", translated)
|
| 14 |
+
|
| 15 |
+
# Step 3: Text to Speech
|
| 16 |
+
output_audio_path = text_to_speech(translated, "v2/hi_speaker_2")
|
| 17 |
+
print("Saved translated speech to:", output_audio_path)
|
| 18 |
+
|
| 19 |
+
return output_audio_path
|
| 20 |
+
|
| 21 |
+
if __name__ == "__main__":
|
| 22 |
+
voice_to_voice("Input Audio Sample.wav")
|
my_tts.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from transformers import BarkModel, AutoProcessor
|
| 2 |
import torch
|
|
|
|
| 3 |
|
| 4 |
def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
|
| 5 |
"""
|
|
@@ -34,5 +35,9 @@ def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
|
|
| 34 |
# generate speech
|
| 35 |
speech_output = model.generate(**inputs)
|
| 36 |
sampling_rate = model.generation_config.sample_rate
|
|
|
|
| 37 |
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from transformers import BarkModel, AutoProcessor
|
| 2 |
import torch
|
| 3 |
+
import scipy
|
| 4 |
|
| 5 |
def text_to_speech(text, voice_preset="v2/hi_speaker_2"):
|
| 6 |
"""
|
|
|
|
| 35 |
# generate speech
|
| 36 |
speech_output = model.generate(**inputs)
|
| 37 |
sampling_rate = model.generation_config.sample_rate
|
| 38 |
+
path = "output_audio.wav"
|
| 39 |
|
| 40 |
+
# Save the generated audio to a fileimport scipy
|
| 41 |
+
scipy.io.wavfile.write("output_audio.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())
|
| 42 |
+
|
| 43 |
+
return path
|