ggoknar
commited on
Commit
·
3f2e1a8
1
Parent(s):
bd470e7
limit speech to 250 characters for now
Browse files
app.py
CHANGED
@@ -399,7 +399,13 @@ def generate_speech(history):
|
|
399 |
for sentence, history in get_sentence(history):
|
400 |
print(sentence)
|
401 |
# Sometimes prompt </s> coming on output remove it
|
|
|
402 |
sentence = sentence.replace("</s>", "")
|
|
|
|
|
|
|
|
|
|
|
403 |
# A fast fix for last chacter, may produce weird sounds if it is with text
|
404 |
if sentence[-1] in ["!", "?", ".", ","]:
|
405 |
# just add a space
|
@@ -410,49 +416,56 @@ def generate_speech(history):
|
|
410 |
# generate speech using precomputed latents
|
411 |
# This is not streaming but it will be fast
|
412 |
# wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
|
440 |
if not DIRECT_STREAM:
|
441 |
yield (
|
442 |
gr.Audio.update(value=None, autoplay=True),
|
443 |
history,
|
444 |
) # hack to switch autoplay
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
|
|
449 |
|
450 |
-
|
451 |
-
|
452 |
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
|
457 |
except RuntimeError as e:
|
458 |
if "device-side assert" in str(e):
|
@@ -480,7 +493,7 @@ def generate_speech(history):
|
|
480 |
# yield (combined_file_name, history
|
481 |
|
482 |
wav_bytestream = wave_header_chunk() + wav_bytestream
|
483 |
-
time.sleep(0.
|
484 |
yield (gr.Audio.update(value=None, autoplay=False), history)
|
485 |
yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
|
486 |
|
|
|
399 |
for sentence, history in get_sentence(history):
|
400 |
print(sentence)
|
401 |
# Sometimes prompt </s> coming on output remove it
|
402 |
+
# Some post process for speech only
|
403 |
sentence = sentence.replace("</s>", "")
|
404 |
+
sentence = sentence.replace("```", "")
|
405 |
+
sentence = sentence.replace("```", "")
|
406 |
+
sentence = sentence.replace("(", " ")
|
407 |
+
sentence = sentence.replace(")", " ")
|
408 |
+
|
409 |
# A fast fix for last chacter, may produce weird sounds if it is with text
|
410 |
if sentence[-1] in ["!", "?", ".", ","]:
|
411 |
# just add a space
|
|
|
416 |
# generate speech using precomputed latents
|
417 |
# This is not streaming but it will be fast
|
418 |
# wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
|
419 |
+
if len(sentence) > 250:
|
420 |
+
# should not generate voice it will hit token limit
|
421 |
+
# It should not generate audio for it
|
422 |
+
audio_stream = None
|
423 |
+
else:
|
424 |
+
audio_stream = get_voice_streaming(
|
425 |
+
sentence, language, latent_map["Female_Voice"]
|
426 |
+
)
|
427 |
+
if audio_stream is not None:
|
428 |
+
wav_chunks = wave_header_chunk()
|
429 |
+
frame_length = 0
|
430 |
+
for chunk in audio_stream:
|
431 |
+
try:
|
432 |
+
wav_bytestream += chunk
|
433 |
+
if DIRECT_STREAM:
|
434 |
+
yield (
|
435 |
+
gr.Audio.update(
|
436 |
+
value=wave_header_chunk() + chunk, autoplay=True
|
437 |
+
),
|
438 |
+
history,
|
439 |
+
)
|
440 |
+
wait_time = len(chunk) / 2 / 24000
|
441 |
+
wait_time = AUDIO_WAIT_MODIFIER * wait_time
|
442 |
+
print("Sleeping till chunk end")
|
443 |
+
time.sleep(wait_time)
|
444 |
+
|
445 |
+
else:
|
446 |
+
wav_chunks += chunk
|
447 |
+
frame_length += len(chunk)
|
448 |
+
except:
|
449 |
+
# hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
|
450 |
+
continue
|
451 |
|
452 |
if not DIRECT_STREAM:
|
453 |
yield (
|
454 |
gr.Audio.update(value=None, autoplay=True),
|
455 |
history,
|
456 |
) # hack to switch autoplay
|
457 |
+
if audio_stream is not None:
|
458 |
+
yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
|
459 |
+
# Streaming wait time calculation
|
460 |
+
# audio_length = frame_length / sample_width/ frame_rate
|
461 |
+
wait_time = frame_length / 2 / 24000
|
462 |
|
463 |
+
# for non streaming
|
464 |
+
# wait_time= librosa.get_duration(path=wav)
|
465 |
|
466 |
+
wait_time = AUDIO_WAIT_MODIFIER * wait_time
|
467 |
+
print("Sleeping till audio end")
|
468 |
+
time.sleep(wait_time)
|
469 |
|
470 |
except RuntimeError as e:
|
471 |
if "device-side assert" in str(e):
|
|
|
493 |
# yield (combined_file_name, history
|
494 |
|
495 |
wav_bytestream = wave_header_chunk() + wav_bytestream
|
496 |
+
time.sleep(0.7)
|
497 |
yield (gr.Audio.update(value=None, autoplay=False), history)
|
498 |
yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)
|
499 |
|