Text-to-Speech
English

generate method can optionally accept a phonetics argument

#3
Files changed (1) hide show
  1. kokoro.py +3 -3
kokoro.py CHANGED
@@ -131,8 +131,8 @@ def forward(model, tokens, ref_s, speed):
131
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
132
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
133
 
134
- def generate(model, text, voicepack, speed=1):
135
- ps = phonemize(text)
136
  tokens = tokenize(ps)
137
  if not tokens:
138
  return None
@@ -142,4 +142,4 @@ def generate(model, text, voicepack, speed=1):
142
  ref_s = voicepack[len(tokens)]
143
  out = forward(model, tokens, ref_s, speed)
144
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
145
- return out, ps
 
131
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
132
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
133
 
134
+ def generate(model, text, voicepack, speed=1, ps=None):
135
+ ps = ps or phonemize(text)
136
  tokens = tokenize(ps)
137
  if not tokens:
138
  return None
 
142
  ref_s = voicepack[len(tokens)]
143
  out = forward(model, tokens, ref_s, speed)
144
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
145
+ return out, ps