Better error messages when inputs are out of bounds.
Browse files- tortoise/api.py +4 -1
tortoise/api.py
CHANGED
@@ -107,7 +107,9 @@ def fix_autoregressive_output(codes, stop_token, complain=True):
|
|
107 |
stop_token_indices = (codes == stop_token).nonzero()
|
108 |
if len(stop_token_indices) == 0:
|
109 |
if complain:
|
110 |
-
print("No stop tokens found,
|
|
|
|
|
111 |
return codes
|
112 |
else:
|
113 |
codes[stop_token_indices] = 83
|
@@ -310,6 +312,7 @@ class TextToSpeech:
|
|
310 |
"""
|
311 |
text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda()
|
312 |
text = F.pad(text, (0, 1)) # This may not be necessary.
|
|
|
313 |
|
314 |
if voice_samples is not None:
|
315 |
auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples)
|
|
|
107 |
stop_token_indices = (codes == stop_token).nonzero()
|
108 |
if len(stop_token_indices) == 0:
|
109 |
if complain:
|
110 |
+
print("No stop tokens found. This typically means the spoken audio is too long. In some cases, the output "
|
111 |
+
"will still be good, though. Listen to it and if it is missing words, try breaking up your input "
|
112 |
+
"text.")
|
113 |
return codes
|
114 |
else:
|
115 |
codes[stop_token_indices] = 83
|
|
|
312 |
"""
|
313 |
text = torch.IntTensor(self.tokenizer.encode(text)).unsqueeze(0).cuda()
|
314 |
text = F.pad(text, (0, 1)) # This may not be necessary.
|
315 |
+
assert text.shape[-1] < 400, 'Too much text provided. Break the text up into separate segments and re-try inference.'
|
316 |
|
317 |
if voice_samples is not None:
|
318 |
auto_conditioning, diffusion_conditioning = self.get_conditioning_latents(voice_samples)
|