Spaces:
Sleeping
Sleeping
add app file
Browse files- README.md +38 -5
- app.py +54 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title: Demo Lux Tts
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
@@ -10,4 +10,37 @@ pinned: false
|
|
10 |
license: cc-by-nc-sa-4.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Demo Lux Piper Tts
|
3 |
+
emoji: π
|
4 |
+
colorFrom: pink
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
app_file: app.py
|
|
|
10 |
license: cc-by-nc-sa-4.0
|
11 |
---
|
12 |
|
13 |
+
# Luxembourgish Text-to-Speech Synthesizer
|
14 |
+
|
15 |
+
This is a text-to-speech synthesizer that uses the Piper TTS model to synthesize Luxembourgish text into speech. The project includes a Gradio interface that allows users to enter Luxembourgish text and hear the synthesized speech.
|
16 |
+
|
17 |
+
|
18 |
+
## Demo
|
19 |
+
|
20 |
+
You can try out the demo of this project on Hugging Face Spaces [here](https://huggingface.co/spaces/sekhan/demo-lux-piper-tts).
|
21 |
+
|
22 |
+
## Usage
|
23 |
+
|
24 |
+
To use the project, simply enter some Luxembourgish text into the input field and click the "Synthesize" button to hear the synthesized speech.
|
25 |
+
|
26 |
+
## Files
|
27 |
+
|
28 |
+
The project includes the following files:
|
29 |
+
|
30 |
+
- `app.py`: the main Python script that contains the code for the Gradio interface and the text-to-speech synthesizer.
|
31 |
+
- `requirements.txt`: a list of the necessary libraries and their versions.
|
32 |
+
- `.gitignore`: a file that excludes certain files and directories from being tracked by Git.
|
33 |
+
- `lu_rtl_high3000.onnx` and `lu_rtl_high3000.onnx.json`: the Piper TTS model and configuration files for Luxembourgish.
|
34 |
+
|
35 |
+
## License
|
36 |
+
|
37 |
+
This project is licensed under the Creative Commons Attribution Non Commercial Share Alike 4.0 License.
|
38 |
+
|
39 |
+
## Acknowledgements
|
40 |
+
|
41 |
+
The Luxembourgish voice is trained using the subset: `rtl.lu : 1257 luxembourgish male samples (Β© RTL-CLT-UFA)` subset of the dataset by `mbarnig/lb-de-fr-en-pt-12800-TTS-CORPUS`.
|
42 |
+
|
43 |
+
|
44 |
+
## Disclaimer
|
45 |
+
|
46 |
+
This project is a demo and is not intended for production use. The text data entered into the interface is not saved and is only used for the speech synthesis.
|
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
import gradio as gr
|
4 |
+
from piper import PiperVoice
|
5 |
+
from io import BytesIO
|
6 |
+
import wave
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
|
10 |
+
def text_to_speech(text):
|
11 |
+
# Load voice data
|
12 |
+
model_path = hf_hub_download(repo_id="sekhan/luxembourgish-voice",
|
13 |
+
repo_type='dataset',
|
14 |
+
filename="high/lu_rtl_high3239.onnx",
|
15 |
+
token=os.environ['HF_TOKEN'])
|
16 |
+
config_path = hf_hub_download(repo_id="sekhan/luxembourgish-voice",
|
17 |
+
repo_type='dataset',
|
18 |
+
filename="high/lu_rtl_high3239.onnx.json",
|
19 |
+
token=os.environ['HF_TOKEN'])
|
20 |
+
|
21 |
+
# Load Lux. voice
|
22 |
+
voice = PiperVoice.load(model_path, config_path)
|
23 |
+
|
24 |
+
buffer = BytesIO()
|
25 |
+
with wave.open(buffer, 'wb') as wav_file:
|
26 |
+
wav_file.setframerate(voice.config.sample_rate)
|
27 |
+
wav_file.setsampwidth(2)
|
28 |
+
wav_file.setnchannels(1)
|
29 |
+
voice.synthesize(text, wav_file, sentence_silence=0.5, length_scale=1.1, noise_scale=0.75)
|
30 |
+
buffer.seek(0)
|
31 |
+
audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
|
32 |
+
|
33 |
+
return audio_data.tobytes(), None
|
34 |
+
|
35 |
+
|
36 |
+
# Gradio Interface
|
37 |
+
with gr.Blocks(theme=gr.themes.Base(), css="footer {visibility: hidden}") as blocks:
|
38 |
+
gr.Markdown("# Luxembourgish Text-to-Speech Synthesizer")
|
39 |
+
gr.Markdown("Enter Luxembourgish text to synthesize it into speech. This is a very early demo. Your spontaneous text data are not saved and only used for the speech synthesis.")
|
40 |
+
input_text = gr.Textbox(label="Input Text", max_lines=3, placeholder="Enter text here...")
|
41 |
+
submit_button = gr.Button("Synthesize")
|
42 |
+
output_audio = gr.Audio(label="Synthesized Speech", type="numpy", show_download_button=False)
|
43 |
+
output_text = gr.Textbox(label="Output Text", visible=False)
|
44 |
+
|
45 |
+
def process_and_output(text):
|
46 |
+
audio, message = text_to_speech(text)
|
47 |
+
if message:
|
48 |
+
return audio, message
|
49 |
+
else:
|
50 |
+
return audio, None
|
51 |
+
|
52 |
+
submit_button.click(process_and_output, inputs=input_text, outputs=[output_audio, output_text])
|
53 |
+
|
54 |
+
blocks.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.36.1
|
2 |
+
piper-tts==1.2.0
|
3 |
+
PyWavelets==1.5.0
|
4 |
+
numpy==1.24.4
|