Hecheng0625 commited on
Commit
05f5f5a
1 Parent(s): 606a181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -5
app.py CHANGED
@@ -59,6 +59,7 @@ fa_encoder.eval()
59
  fa_decoder.eval()
60
  fa_redecoder.eval()
61
 
 
62
  def codec_inference(speech_path):
63
 
64
  with torch.no_grad():
@@ -118,6 +119,18 @@ def codec_voice_conversion(speech_path_a, speech_path_b):
118
 
119
 
120
  demo_inputs = [
 
 
 
 
 
 
 
 
 
 
 
 
121
  gr.Audio(
122
  sources=["upload", "microphone"],
123
  label="Upload the source speech file",
@@ -130,7 +143,7 @@ demo_inputs = [
130
  ),
131
  ]
132
 
133
- demo_outputs = [
134
  gr.Audio(label="Source speech reconstructed"),
135
  gr.Audio(label="Reference speech reconstructed"),
136
  gr.Audio(label="Voice conversion result"),
@@ -138,10 +151,10 @@ demo_outputs = [
138
 
139
  with gr.Blocks() as demo:
140
  gr.Interface(
141
- fn=codec_voice_conversion,
142
  inputs=demo_inputs,
143
  outputs=demo_outputs,
144
- title="NaturalSpeech3 FACodec",
145
  description="""
146
  ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
147
 
@@ -162,12 +175,31 @@ with gr.Blocks() as demo:
162
  gr.Examples(
163
  examples=[
164
  [
165
- "default/source/source.wav",
166
  "default/ref/ref.wav",
167
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
 
 
 
 
 
 
169
  ],
170
- inputs=demo_inputs,
171
  )
172
 
173
  demo.queue()
 
59
  fa_decoder.eval()
60
  fa_redecoder.eval()
61
 
62
+ @spaces.GPU
63
  def codec_inference(speech_path):
64
 
65
  with torch.no_grad():
 
119
 
120
 
121
  demo_inputs = [
122
+ gr.Audio(
123
+ sources=["upload", "microphone"],
124
+ label="Upload the speech file",
125
+ type="filepath",
126
+ ),
127
+ ]
128
+
129
+ demo_outputs = [
130
+ gr.Audio(label="Speech reconstructed"),
131
+ ]
132
+
133
+ vc_demo_inputs = [
134
  gr.Audio(
135
  sources=["upload", "microphone"],
136
  label="Upload the source speech file",
 
143
  ),
144
  ]
145
 
146
+ vc_demo_outputs = [
147
  gr.Audio(label="Source speech reconstructed"),
148
  gr.Audio(label="Reference speech reconstructed"),
149
  gr.Audio(label="Voice conversion result"),
 
151
 
152
  with gr.Blocks() as demo:
153
  gr.Interface(
154
+ fn=codec_inference,
155
  inputs=demo_inputs,
156
  outputs=demo_outputs,
157
+ title="FACodec for NaturalSpeech 3",
158
  description="""
159
  ## FACodec: Speech Codec with Attribute Factorization used for NaturalSpeech 3
160
 
 
175
  gr.Examples(
176
  examples=[
177
  [
178
+ "default/ref/ref.wav",
179
  "default/ref/ref.wav",
180
  ],
181
+ ],
182
+ inputs=vc_demo_inputs,
183
+ )
184
+
185
+ gr.Interface(
186
+ fn=codec_voice_conversion,
187
+ inputs=vc_demo_inputs,
188
+ outputs=vc_demo_outputs,
189
+ title="FACodec Voice Conversion",
190
+ description="""
191
+ FACodec can achieve zero-shot voice conversion.
192
+ """,
193
+ )
194
 
195
+ gr.Examples(
196
+ examples=[
197
+ [
198
+ "default/source/source.wav",
199
+ "default/ref/ref.wav",
200
+ ],
201
  ],
202
+ inputs=vc_demo_inputs,
203
  )
204
 
205
  demo.queue()