jbetker commited on
Commit
cd2d422
1 Parent(s): b1fc2b1

Better error messages when inputs are out of bounds.

Browse files
Files changed (1) hide show
  1. tortoise/api.py +4 -1
tortoise/api.py CHANGED
@@ -107,7 +107,9 @@ def fix_autoregressive_output(codes, stop_token, complain=True):
107
  stop_token_indices = (codes == stop_token).nonzero()
108
  if len(stop_token_indices) == 0:
109
  if complain:
110
- print("No stop tokens found, enjoy that output of yours!")
 
 
111
  return codes
112
  else:
113
  codes[stop_token_indices] = 83
@@ -310,6 +312,7 @@ class TextToSpeech:
310
  """
311
  text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda()
312
  text = F.pad(text, (0, 1)) # This may not be necessary.
 
313
 
314
  if voice_samples is not None:
315
  auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples)
 
107
  stop_token_indices = (codes == stop_token).nonzero()
108
  if len(stop_token_indices) == 0:
109
  if complain:
110
+ print("No stop tokens found. This typically means the spoken audio is too long. In some cases, the output "
111
+ "will still be good, though. Listen to it and if it is missing words, try breaking up your input "
112
+ "text.")
113
  return codes
114
  else:
115
  codes[stop_token_indices] = 83
 
312
  """
313
  text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda()
314
  text = F.pad(text, (0, 1)) # This may not be necessary.
315
+ assert text.shape[-1] < 400, 'Too much text provided. Break the text up into separate segments and re-try inference.'
316
 
317
  if voice_samples is not None:
318
  auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples)