ylacombe HF staff commited on
Commit
865af48
1 Parent(s): aeeb0c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -98,8 +98,9 @@ css = """
98
 
99
  # Gradio blocks demo
100
  with gr.Blocks(css=css) as demo_blocks:
101
- with gr.Column(elem_id="container"):
102
- gr.Markdown(title, elem_id="intro")
 
103
  with gr.Column():
104
  inp_text = gr.Textbox(label="Input Text", info="What would you like VITS to synthesise?")
105
  btn = gr.Button("Generate Audio!")
@@ -117,46 +118,46 @@ with gr.Blocks(css=css) as demo_blocks:
117
  outputs.append(out_audio)
118
 
119
 
120
- gr.Markdown("""
121
- ## Datasets and models details
122
 
123
- ### English
124
 
125
- * **Model**: [VITS-ljs](https://huggingface.co/kakao-enterprise/vits-ljs)
126
- * **Dataset**: [British Isles Accent](https://huggingface.co/datasets/ylacombe/english_dialects). For each accent, we used 100 to 150 samples of a single speaker to finetune [VITS-ljs](https://huggingface.co/kakao-enterprise/vits-ljs).
127
-
128
- ### Spanish
129
 
130
- * **Model**: [Spanish MMS TTS](https://huggingface.co/facebook/mms-tts-spa). This model is part of Facebook's [Massively Multilingual Speech](https://arxiv.org/abs/2305.13516) project, aiming to
131
  provide speech technology across a diverse range of languages. You can find more details about the supported languages
132
  and their ISO 639-3 codes in the [MMS Language Coverage Overview](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html),
133
  and see all MMS-TTS checkpoints on the Hugging Face Hub: [facebook/mms-tts](https://huggingface.co/models?sort=trending&search=facebook%2Fmms-tts).
134
- * **Datasets**: For each accent, we used 100 to 150 samples of a single speaker to finetune the model.
135
- - [Colombian Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-colombian-spanish).
136
- - [Argentinian Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-argentinian-spanish).
137
- - [Chilean Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-chilean-spanish).
138
-
139
- """)
140
-
141
- with gr.Accordion("Run with transformers"):
142
- gr.Markdown(
143
- """## Running VITS and MMS with transformers
144
- ```bash
145
- pip install transformers
146
- ```
147
- ```py
148
- from transformers import pipeline
149
- import scipy
150
- pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=0)
151
-
152
- results = pipe("A cinematic shot of a baby racoon wearing an intricate italian priest robe")
 
 
 
 
 
 
153
 
154
- # write to a wav file
155
- scipy.io.wavfile.write("audio_vits.wav", rate=results["sampling_rate"], data=results["audio"].squeeze())
156
- ```
157
- """
158
- )
159
-
160
  btn.click(generate_audio, [inp_text, language], outputs)
161
 
162
 
 
98
 
99
  # Gradio blocks demo
100
  with gr.Blocks(css=css) as demo_blocks:
101
+ gr.Markdown(title, elem_id="intro")
102
+
103
+ with gr.Row():
104
  with gr.Column():
105
  inp_text = gr.Textbox(label="Input Text", info="What would you like VITS to synthesise?")
106
  btn = gr.Button("Generate Audio!")
 
118
  outputs.append(out_audio)
119
 
120
 
121
+ gr.Markdown("""
122
+ ## Datasets and models details
123
 
124
+ ### English
125
 
126
+ * **Model**: [VITS-ljs](https://huggingface.co/kakao-enterprise/vits-ljs)
127
+ * **Dataset**: [British Isles Accent](https://huggingface.co/datasets/ylacombe/english_dialects). For each accent, we used 100 to 150 samples of a single speaker to finetune [VITS-ljs](https://huggingface.co/kakao-enterprise/vits-ljs).
128
+
129
+ ### Spanish
130
 
131
+ * **Model**: [Spanish MMS TTS](https://huggingface.co/facebook/mms-tts-spa). This model is part of Facebook's [Massively Multilingual Speech](https://arxiv.org/abs/2305.13516) project, aiming to
132
  provide speech technology across a diverse range of languages. You can find more details about the supported languages
133
  and their ISO 639-3 codes in the [MMS Language Coverage Overview](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html),
134
  and see all MMS-TTS checkpoints on the Hugging Face Hub: [facebook/mms-tts](https://huggingface.co/models?sort=trending&search=facebook%2Fmms-tts).
135
+ * **Datasets**: For each accent, we used 100 to 150 samples of a single speaker to finetune the model.
136
+ - [Colombian Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-colombian-spanish).
137
+ - [Argentinian Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-argentinian-spanish).
138
+ - [Chilean Spanish TTS dataset](https://huggingface.co/datasets/ylacombe/google-chilean-spanish).
139
+
140
+ """)
141
+
142
+ with gr.Accordion("Run with transformers"):
143
+ gr.Markdown(
144
+ """## Running VITS and MMS with transformers
145
+ ```bash
146
+ pip install transformers
147
+ ```
148
+ ```py
149
+ from transformers import pipeline
150
+ import scipy
151
+ pipe = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=0)
152
+
153
+ results = pipe("A cinematic shot of a baby racoon wearing an intricate italian priest robe")
154
+
155
+ # write to a wav file
156
+ scipy.io.wavfile.write("audio_vits.wav", rate=results["sampling_rate"], data=results["audio"].squeeze())
157
+ ```
158
+ """
159
+ )
160
 
 
 
 
 
 
 
161
  btn.click(generate_audio, [inp_text, language], outputs)
162
 
163