Pawan Kumar Pradhan commited on
Commit
36bb9b3
·
1 Parent(s): bdc5baa

3lang hardcode commit

Browse files
Files changed (1) hide show
  1. app.py +102 -70
app.py CHANGED
@@ -5,46 +5,20 @@ from TTS.api import TTS
5
  import uuid
6
  import os
7
  from pathlib import Path
 
 
8
 
9
  os.environ["COQUI_TOS_AGREED"] = "1"
10
 
11
  model = whisper.load_model("base")
12
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
13
 
14
-
15
- def v2vtranslate(audiofile):
16
-
17
- print("Starting transcription...")
18
- transcription_result = transcribeaudio(audiofile)
19
-
20
- if transcription_result.status == model.transcribe.error:
21
- raise gr.Error(transcription_result.error)
22
- else:
23
- text = transcription_result.text
24
- print(f"Transcribed Text: {text}")
25
-
26
- print("Starting translation...")
27
- es_translation,fr_translation,hi_translation,ja_translation = translatetext(text)
28
- print(f"Translations:\nSpanish: {es_translation}\nFrench: {fr_translation}\nHindi: {hi_translation}\nJapanese: {ja_translation}")
29
-
30
- print("Generating TTS audio files(Outside Function)...")
31
- es_translation_path = readtranslation(es_translation,audiofile)
32
- fr_translation_path = readtranslation(fr_translation,audiofile)
33
- hi_translation_path = readtranslation(hi_translation,audiofile)
34
- ja_translation_path = readtranslation(ja_translation,audiofile)
35
- print(f"Generated audio paths:\nSpanish: {es_translation_path}\nFrench: {fr_translation_path}\nHindi: {hi_translation_path}\nJapanese: {ja_translation_path}")
36
-
37
-
38
-
39
- es_path = Path(es_translation_path)
40
- fr_path = Path(fr_translation_path)
41
- hi_path = Path(hi_translation_path)
42
- ja_path = Path(ja_translation_path)
43
 
44
 
45
 
46
  def transcribeaudio(audiofile):
47
-
48
  print("Transcribing audio...")
49
  tresult = model.transcribe(audiofile)
50
 
@@ -57,49 +31,107 @@ def transcribeaudio(audiofile):
57
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
58
 
59
  _, probs = model.detect_language(mel)
60
- print(f"Detected language: {max(probs, key=probs.get)}")
61
-
62
- return tresult
63
-
64
- def translatetext(text):
65
-
66
- translator_spanish = Translator(from_lang="en",to_lang="es")
67
- es_text = translator_spanish.translate(text)
68
-
69
- translator_french = Translator(from_lang="en",to_lang="fr")
70
- fr_text = translator_french.translate(text)
71
-
72
- translator_hindi = Translator(from_lang="en",to_lang="hi")
73
- hi_text = translator_hindi.translate(text)
74
-
75
- translator_japanese = Translator(from_lang="en",to_lang="ja")
76
- ja_text = translator_japanese.translate(text)
77
- print(f"Japanese Translation(Inside Function): {ja_text}")
78
-
79
- return es_text,fr_text,hi_text,ja_text
80
-
81
-
82
- def readtranslation(text,audiofile):
83
-
84
- print(f"Generating TTS for text(Inside Function): {text}")
85
- output_path = f"{uuid.uuid4()}.wav"
86
  tts.tts_to_file(text=text,
87
- file_path=output_path,
88
- speaker_wav=audiofile,
89
- language="en")
90
  print(f"Generated audio file at: {output_path}")
91
  return output_path
92
 
93
-
94
- audio_input = gr.Audio(
95
- sources=['microphone'],
96
- type="filepath"
97
- )
98
- demo = gr.Interface(
99
- fn=v2vtranslate,
100
- inputs=audio_input,
101
- outputs=[gr.Audio(label="Spanish"),gr.Audio(label="French"),gr.Audio(label="Hindi"),gr.Audio(label="Japanese")]
102
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  if __name__ == "__main__":
105
- demo.launch()
 
 
5
  import uuid
6
  import os
7
  from pathlib import Path
8
+ import gc
9
+ import torch
10
 
11
  os.environ["COQUI_TOS_AGREED"] = "1"
12
 
13
  model = whisper.load_model("base")
14
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
15
 
16
+ output_dir = "/content/output_audio"
17
+ os.makedirs(output_dir, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
 
21
  def transcribeaudio(audiofile):
 
22
  print("Transcribing audio...")
23
  tresult = model.transcribe(audiofile)
24
 
 
31
  mel = whisper.log_mel_spectrogram(audio).to(model.device)
32
 
33
  _, probs = model.detect_language(mel)
34
+ detected_language = max(probs, key=probs.get)
35
+ print(f"Detected language: {detected_language}")
36
+
37
+ return {"text": tresult["text"], "language": detected_language}
38
+
39
+ def translatetext(text, source_lang):
40
+ translations = {}
41
+ languages = {"es": "Spanish", "fr": "French", "hi": "Hindi"}
42
+
43
+ for lang_code, lang_name in languages.items():
44
+ try:
45
+ translator = Translator(from_lang=source_lang, to_lang=lang_code)
46
+ translated_text = translator.translate(text)
47
+ translations[lang_code] = translated_text
48
+ print(f"{lang_name} Translation: {translated_text}")
49
+ except Exception as e:
50
+ print(f"Error translating to {lang_name}: {str(e)}")
51
+ translations[lang_code] = f"Error: Could not translate to {lang_name}"
52
+
53
+ return [translations[lang] for lang in ["es", "fr", "hi"]]
54
+
55
+ def readtranslation(text, audiofile, language):
56
+ output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
57
+ print(f"Generating TTS for text: {text}")
 
 
58
  tts.tts_to_file(text=text,
59
+ file_path=output_path,
60
+ speaker_wav=audiofile,
61
+ language=language)
62
  print(f"Generated audio file at: {output_path}")
63
  return output_path
64
 
65
+ def voice_to_voice(audiofile, progress=gr.Progress()):
66
+ progress(0, desc="Starting process...")
67
+ try:
68
+ progress(0.2, desc="Transcribing audio...")
69
+ transcription_result = transcribeaudio(audiofile)
70
+
71
+ if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
72
+ raise gr.Error(transcription_result["error"])
73
+
74
+ text = transcription_result["text"]
75
+ detected_language = transcription_result["language"]
76
+
77
+ progress(0.4, desc="Translating text...")
78
+ translations = translatetext(text, detected_language)
79
+
80
+ audio_paths = []
81
+ languages = ["es", "fr", "hi"]
82
+ for i, (lang, translation) in enumerate(zip(languages, translations)):
83
+ progress((i + 1) * 0.1 + 0.5, desc=f"Generating {lang} audio...")
84
+ try:
85
+ audio_path = readtranslation(translation, audiofile, lang)
86
+ audio_paths.append(audio_path)
87
+ except Exception as e:
88
+ print(f"Error generating audio for {lang}: {str(e)}")
89
+ audio_paths.append(None)
90
+
91
+ progress(1.0, desc="Process complete!")
92
+ return audio_paths + translations
93
+ except Exception as e:
94
+ raise gr.Error(f"An error occurred: {str(e)}")
95
+ finally:
96
+ cleanup_memory()
97
+
98
+ with gr.Blocks() as demo:
99
+ gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
100
+ with gr.Row():
101
+ with gr.Column():
102
+ audio_input = gr.Audio(sources=["microphone"],
103
+ type="filepath",
104
+ show_download_button=True,
105
+ waveform_options=gr.WaveformOptions(
106
+ waveform_color="#01C6FF",
107
+ waveform_progress_color="#0066B4",
108
+ skip_length=2,
109
+ show_controls=False,
110
+ ))
111
+ with gr.Row():
112
+ submit = gr.Button("Submit", variant="primary")
113
+ btn = gr.ClearButton(audio_input, "Clear")
114
+
115
+ with gr.Row():
116
+ with gr.Group():
117
+ es_output = gr.Audio(label="Spanish", interactive=False)
118
+ es_text = gr.Markdown()
119
+ with gr.Group():
120
+ fr_output = gr.Audio(label="French", interactive=False)
121
+ fr_text = gr.Markdown()
122
+ with gr.Group():
123
+ hi_output = gr.Audio(label="Hindi", interactive=False)
124
+ hi_text = gr.Markdown()
125
+
126
+ output_components = [es_output, fr_output, hi_output,
127
+ es_text, fr_text, hi_text]
128
+ submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
129
+
130
+ def cleanup_memory():
131
+ gc.collect()
132
+ torch.cuda.empty_cache()
133
+ print("Memory cleaned up")
134
 
135
  if __name__ == "__main__":
136
+ demo.launch()
137
+ cleanup_memory()