Hecheng0625 commited on
Commit
9730071
1 Parent(s): 7634b6c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -21
app.py CHANGED
@@ -135,27 +135,54 @@ demo_outputs = [
135
  gr.Audio(label="Voice conversion result"),
136
  ]
137
 
138
- demo = gr.Interface(
139
- fn=codec_voice_conversion,
140
- inputs=demo_inputs,
141
- outputs=demo_outputs,
142
- title="NaturalSpeech3 FACodec",
143
- description="""
144
- ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
145
-
146
- [![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/pdf/2403.03100.pdf)
147
-
148
- [![demo](https://img.shields.io/badge/FACodec-Demo-red)](https://speechresearch.github.io/naturalspeech3/)
149
-
150
- [![model](https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Models-pink)](https://huggingface.co/amphion/naturalspeech3_facodec)
151
-
152
- ## Overview
153
-
154
- FACodec is a core component of the advanced text-to-speech (TTS) model NaturalSpeech 3. FACodec converts complex speech waveform into disentangled subspaces representing speech attributes of content, prosody, timbre, and acoustic details and reconstruct high-quality speech waveform from these attributes. FACodec decomposes complex speech into subspaces representing different attributes, thus simplifying the modeling of speech representation.
155
-
156
- Research can use FACodec to develop different modes of TTS models, such as non-autoregressive based discrete diffusion (NaturalSpeech 3) or autoregressive models (like VALL-E).
157
- """,
158
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
  if __name__ == "__main__":
161
  demo.launch()
 
135
  gr.Audio(label="Voice conversion result"),
136
  ]
137
 
138
+ with gr.Blocks() as demo:
139
+ gr.Interface(
140
+ fn=codec_voice_conversion,
141
+ inputs=demo_inputs,
142
+ outputs=demo_outputs,
143
+ title="NaturalSpeech3 FACodec",
144
+ description="""
145
+ ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
146
+
147
+ [![arXiv](https://img.shields.io/badge/arXiv-Paper-<COLOR>.svg)](https://arxiv.org/pdf/2403.03100.pdf)
148
+
149
+ [![demo](https://img.shields.io/badge/FACodec-Demo-red)](https://speechresearch.github.io/naturalspeech3/)
150
+
151
+ [![model](https://img.shields.io/badge/%F0%9F%A4%97%20HuggingFace-Models-pink)](https://huggingface.co/amphion/naturalspeech3_facodec)
152
+
153
+ ## Overview
154
+
155
+ FACodec is a core component of the advanced text-to-speech (TTS) model NaturalSpeech 3. FACodec converts complex speech waveform into disentangled subspaces representing speech attributes of content, prosody, timbre, and acoustic details and reconstruct high-quality speech waveform from these attributes. FACodec decomposes complex speech into subspaces representing different attributes, thus simplifying the modeling of speech representation.
156
+
157
+ Research can use FACodec to develop different modes of TTS models, such as non-autoregressive based discrete diffusion (NaturalSpeech 3) or autoregressive models (like VALL-E).
158
+ """,
159
+ )
160
+
161
+ gr.Examples(
162
+ examples=[
163
+ [
164
+ "default/ref/1.wav",
165
+ "default/source/1.wav",
166
+ ],
167
+ [
168
+ "default/ref/2.wav",
169
+ "default/source/2.wav",
170
+ ],
171
+ [
172
+ "default/ref/3.wav",
173
+ "default/source/3.wav",
174
+ ],
175
+ [
176
+ "default/ref/4.wav",
177
+ "default/source/4.wav",
178
+ ],
179
+ [
180
+ "default/ref/5.wav",
181
+ "default/source/5.wav",
182
+ ],
183
+ ],
184
+ inputs=demo_inputs,
185
+ )
186
 
187
  if __name__ == "__main__":
188
  demo.launch()