Flux9665 commited on
Commit
20636e5
·
1 Parent(s): bbed20a

add voice cloning interface

Browse files
Architectures/ToucanTTS/InferenceToucanTTS.py CHANGED
@@ -207,6 +207,8 @@ class ToucanTTS(torch.nn.Module):
207
  text_tensors = torch.clamp(text_tensors, max=1.0)
208
  # this is necessary, because of the way we represent modifiers to keep them identifiable.
209
 
 
 
210
  if not self.multilingual_model:
211
  lang_ids = None
212
 
 
207
  text_tensors = torch.clamp(text_tensors, max=1.0)
208
  # this is necessary, because of the way we represent modifiers to keep them identifiable.
209
 
210
+ utterance_embedding = torch.nn.functional.normalize(utterance_embedding)
211
+
212
  if not self.multilingual_model:
213
  lang_ids = None
214
 
app.py CHANGED
@@ -13,7 +13,7 @@ class TTSWebUI:
13
  self.iface = gr.Interface(fn=self.read,
14
  inputs=[gr.Textbox(lines=2,
15
  placeholder="write what you want the synthesis to read here...",
16
- value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
17
  label="Text input"),
18
  gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
19
  gr.Slider(minimum=0.0, maximum=0.8, step=0.1, value=0.4, label="Prosody Creativity"),
 
13
  self.iface = gr.Interface(fn=self.read,
14
  inputs=[gr.Textbox(lines=2,
15
  placeholder="write what you want the synthesis to read here...",
16
+ value="What I cannot create, I do not understand.",
17
  label="Text input"),
18
  gr.Audio(type="filepath", show_label=True, container=True, label="Voice to Clone (if left empty, will use an artificial voice instead)"),
19
  gr.Slider(minimum=0.0, maximum=0.8, step=0.1, value=0.4, label="Prosody Creativity"),