Edited README.md and removed flagging button
Browse files- README.md +9 -11
- app.py +8 -4
- requirements.txt +2 -2
README.md
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
app_file: app.py
|
4 |
-
sdk: gradio
|
5 |
-
sdk_version: 4.36.1
|
6 |
-
---
|
7 |
-
# Speech Translation Synthesis: A Speech-To-Speech Translator
|
8 |
|
9 |
-
|
|
|
|
|
10 |
|
11 |
## Features
|
12 |
- Transcribe speech from an audio file or microphone input
|
@@ -55,8 +52,8 @@ This project was one of the projects for SDSU's Artificial Intelligence Club for
|
|
55 |
- **translate(text, language)**: Translates the transcribed text into the target language.
|
56 |
- **s2s(audio, language)**: Combines the transcription and translation functions, then synthesizes the translated text into speech using the input speaker's voice.
|
57 |
|
58 |
-
### Supported Languages
|
59 |
-
- Arabic
|
60 |
- Portuguese
|
61 |
- Chinese
|
62 |
- Czech
|
@@ -73,10 +70,11 @@ This project was one of the projects for SDSU's Artificial Intelligence Club for
|
|
73 |
- Hungarian
|
74 |
- Hindi
|
75 |
|
76 |
-
## License
|
77 |
This project is licensed under the MIT License. See the LICENSE file for more details.
|
78 |
|
79 |
## Acknowledgements
|
|
|
80 |
- [Gradio](https://www.gradio.app/) for providing the easy-to-use interface library.
|
81 |
- [Whisper](https://github.com/openai/whisper) for the speech-to-text model.
|
82 |
- [Coqui TTS](https://github.com/coqui-ai/TTS) for the text-to-speech synthesis model.
|
|
|
1 |
+
# Speech Translation Synthesis
|
2 |
+
### A Speech-To-Speech Translator
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
+
SDSU's Artificial Intelligence Club Group Project Spring 2024 semester
|
5 |
+
|
6 |
+
This is a Gradio-based demo that performs speech-to-speech translation. It uses the Whisper model for speech-to-text transcription, the `translate` library for translation, and the Coqui TTS model for text-to-speech synthesis.
|
7 |
|
8 |
## Features
|
9 |
- Transcribe speech from an audio file or microphone input
|
|
|
52 |
- **translate(text, language)**: Translates the transcribed text into the target language.
|
53 |
- **s2s(audio, language)**: Combines the transcription and translation functions, then synthesizes the translated text into speech using the input speaker's voice.
|
54 |
|
55 |
+
### Supported Languages 🗣️
|
56 |
+
- Arabic
|
57 |
- Portuguese
|
58 |
- Chinese
|
59 |
- Czech
|
|
|
70 |
- Hungarian
|
71 |
- Hindi
|
72 |
|
73 |
+
## License
|
74 |
This project is licensed under the MIT License. See the LICENSE file for more details.
|
75 |
|
76 |
## Acknowledgements
|
77 |
+
- SDSU's Artificial Intelligence Club for giving us the idea.
|
78 |
- [Gradio](https://www.gradio.app/) for providing the easy-to-use interface library.
|
79 |
- [Whisper](https://github.com/openai/whisper) for the speech-to-text model.
|
80 |
- [Coqui TTS](https://github.com/coqui-ai/TTS) for the text-to-speech synthesis model.
|
app.py
CHANGED
@@ -31,7 +31,6 @@ def translate(text, language):
|
|
31 |
translated_text = translator.translate(text)
|
32 |
return translated_text
|
33 |
|
34 |
-
|
35 |
# Initialize TTS model outside the function to avoid reinitialization on each call
|
36 |
from TTS.api import TTS
|
37 |
|
@@ -76,7 +75,7 @@ language_dropdown = gr.Dropdown(choices=zip(language_names, language_options),
|
|
76 |
translate_button = gr.Button(value="Synthesize and Translate my Voice!")
|
77 |
transcribed_text = gr.Textbox(label="Transcribed Text")
|
78 |
output_text = gr.Textbox(label="Translated Text")
|
79 |
-
output_speech = gr.Audio(label="
|
80 |
|
81 |
# Gradio interface with the transcribe function as the main function
|
82 |
demo = gr.Interface(
|
@@ -84,10 +83,11 @@ demo = gr.Interface(
|
|
84 |
inputs=[gr.Audio(sources=["upload", "microphone"],
|
85 |
type="filepath",
|
86 |
format='wav',
|
|
|
87 |
show_download_button=True,
|
88 |
waveform_options=gr.WaveformOptions(
|
89 |
waveform_color="#01C6FF",
|
90 |
-
waveform_progress_color="FF69B4",
|
91 |
skip_length=2,
|
92 |
show_controls=False,
|
93 |
)
|
@@ -95,7 +95,11 @@ demo = gr.Interface(
|
|
95 |
language_dropdown],
|
96 |
outputs=[transcribed_text, output_text, output_speech],
|
97 |
theme=gr.themes.Soft(),
|
98 |
-
title="Speech
|
|
|
|
|
|
|
|
|
99 |
)
|
100 |
|
101 |
demo.launch(debug=True, share=True)
|
|
|
31 |
translated_text = translator.translate(text)
|
32 |
return translated_text
|
33 |
|
|
|
34 |
# Initialize TTS model outside the function to avoid reinitialization on each call
|
35 |
from TTS.api import TTS
|
36 |
|
|
|
75 |
translate_button = gr.Button(value="Synthesize and Translate my Voice!")
|
76 |
transcribed_text = gr.Textbox(label="Transcribed Text")
|
77 |
output_text = gr.Textbox(label="Translated Text")
|
78 |
+
output_speech = gr.Audio(label="Synthesized Audio", type="filepath")
|
79 |
|
80 |
# Gradio interface with the transcribe function as the main function
|
81 |
demo = gr.Interface(
|
|
|
83 |
inputs=[gr.Audio(sources=["upload", "microphone"],
|
84 |
type="filepath",
|
85 |
format='wav',
|
86 |
+
# value="Original Audio",
|
87 |
show_download_button=True,
|
88 |
waveform_options=gr.WaveformOptions(
|
89 |
waveform_color="#01C6FF",
|
90 |
+
waveform_progress_color="#FF69B4",
|
91 |
skip_length=2,
|
92 |
show_controls=False,
|
93 |
)
|
|
|
95 |
language_dropdown],
|
96 |
outputs=[transcribed_text, output_text, output_speech],
|
97 |
theme=gr.themes.Soft(),
|
98 |
+
title="Speech Translation Synthesis",
|
99 |
+
description="This speech-to-speech translator uses the Whisper model for speech-to-text "
|
100 |
+
"transcription, the translate library for translation, and the Coqui TTS model for text-to-speech "
|
101 |
+
"synthesis.",
|
102 |
+
allow_flagging="never"
|
103 |
)
|
104 |
|
105 |
demo.launch(debug=True, share=True)
|
requirements.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
numpy
|
2 |
gradio~=4.36.1
|
3 |
git+https://github.com/openai/whisper.git
|
4 |
translate~=3.6.1
|
5 |
-
TTS
|
6 |
ffprobe
|
|
|
1 |
+
numpy
|
2 |
gradio~=4.36.1
|
3 |
git+https://github.com/openai/whisper.git
|
4 |
translate~=3.6.1
|
5 |
+
TTS
|
6 |
ffprobe
|