RayeRen commited on
Commit
e75aa39
1 Parent(s): 15e73a1
inference/tts/gradio/gradio_settings.yaml CHANGED
@@ -1,12 +1,12 @@
1
- title: 'NATSpeech/FastSpeech 2'
2
  description: |
3
- Gradio demo for NATSpeech/FastSpeech 2. To use it, simply add your audio, or click one of the examples to load them. Note: This space is running on CPU, inference times will be higher.
4
  article: |
5
- Link to <a href='https://github.com/NATSpeech/NATSpeech/blob/main/docs/fastspeech2.md' style='color:blue;' target='_blank\'>Github REPO</a>
6
  example_inputs:
7
  - |-
8
  the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.
9
  - |-
10
  produced the block books, which were the immediate predecessors of the true printed book,
11
- inference_cls: inference.tts.fs2_orig.FastSpeech2OrigInfer
12
- exp_name: fs2_exp
 
1
+ title: 'NATSpeech/PortaSpeech'
2
  description: |
3
+ Gradio demo for NATSpeech/PortaSpeech. To use it, simply add your audio, or click one of the examples to load them. Note: This space is running on CPU, inference times will be higher.
4
  article: |
5
+ Link to <a href='https://github.com/NATSpeech/NATSpeech/blob/main/docs/portaspeech.md' style='color:blue;' target='_blank\'>Github REPO</a>
6
  example_inputs:
7
  - |-
8
  the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.
9
  - |-
10
  produced the block books, which were the immediate predecessors of the true printed book,
11
+ inference_cls: inference.tts.ps_flow.PortaSpeechFlowInfer
12
+ exp_name: ps_normal_exp
inference/tts/ps_flow.py CHANGED
@@ -10,8 +10,9 @@ class PortaSpeechFlowInfer(BaseTTSInfer):
10
  ph_dict_size = len(self.ph_encoder)
11
  word_dict_size = len(self.word_encoder)
12
  model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
13
- model.eval()
14
  load_ckpt(model, hparams['work_dir'], 'model')
 
 
15
  return model
16
 
17
  def forward_model(self, inp):
 
10
  ph_dict_size = len(self.ph_encoder)
11
  word_dict_size = len(self.word_encoder)
12
  model = PortaSpeechFlow(ph_dict_size, word_dict_size, self.hparams)
 
13
  load_ckpt(model, hparams['work_dir'], 'model')
14
+ model.post_flow.store_inverse()
15
+ model.eval()
16
  return model
17
 
18
  def forward_model(self, inp):
modules/tts/portaspeech/fvae.py CHANGED
@@ -125,7 +125,7 @@ class FVAE(nn.Module):
125
  return z_q, loss_kl, z_p, m_q, logs_q
126
  else:
127
  latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
128
- z_p = self.prior_dist.sample(latent_shape).to(cond.device) * noise_scale
129
  if self.use_prior_flow:
130
  z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
131
  return z_p
 
125
  return z_q, loss_kl, z_p, m_q, logs_q
126
  else:
127
  latent_shape = [cond_sqz.shape[0], self.latent_size, cond_sqz.shape[2]]
128
+ z_p = torch.randn(latent_shape).to(cond.device) * noise_scale
129
  if self.use_prior_flow:
130
  z_p = self.prior_flow(z_p, 1, cond_sqz, reverse=True)
131
  return z_p
modules/tts/portaspeech/portaspeech_flow.py CHANGED
@@ -70,6 +70,6 @@ class PortaSpeechFlow(PortaSpeech):
70
  ret['postflow'] = None
71
  else:
72
  nonpadding = torch.ones_like(x_recon[:, :1, :])
73
- z_post = prior_dist.sample(x_recon.shape).to(g.device) * self.hparams['noise_scale']
74
  x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
75
  ret['mel_out'] = x_recon.transpose(1, 2)
 
70
  ret['postflow'] = None
71
  else:
72
  nonpadding = torch.ones_like(x_recon[:, :1, :])
73
+ z_post = torch.randn(x_recon.shape).to(g.device) * self.hparams['noise_scale']
74
  x_recon, _ = self.post_flow(z_post, nonpadding, g, reverse=True)
75
  ret['mel_out'] = x_recon.transpose(1, 2)