mrfakename commited on
Commit
73dbaa9
1 Parent(s): 0675d4f

Add experimental long text w/ Tortoise

Browse files
Files changed (2) hide show
  1. app.py +21 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,6 +3,8 @@ import styletts2importable
3
  import ljspeechimportable
4
  import torch
5
  import os
 
 
6
  import pickle
7
  theme = gr.themes.Base(
8
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
@@ -25,6 +27,15 @@ def synthesize(text, voice):
25
  raise gr.Error("Text must be under 300 characters")
26
  v = voice.lower()
27
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
 
 
 
 
 
 
 
 
 
28
  def clsynthesize(text, voice):
29
  if text.strip() == "":
30
  raise gr.Error("You must enter some text")
@@ -59,6 +70,15 @@ with gr.Blocks() as clone:
59
  clbtn = gr.Button("Synthesize", variant="primary")
60
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
61
  clbtn.click(clsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
 
 
 
 
 
 
 
 
 
62
  with gr.Blocks() as lj:
63
  with gr.Row():
64
  with gr.Column(scale=1):
@@ -80,7 +100,7 @@ Is there a long queue on this space? Duplicate it and add a more powerful GPU to
80
 
81
  **NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
82
  gr.DuplicateButton("Duplicate Space")
83
- gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'Voice Cloning', 'LJSpeech'])
84
  gr.Markdown("""
85
  Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
86
 
 
3
  import ljspeechimportable
4
  import torch
5
  import os
6
+ from tortoise.utils.text import split_and_recombine_text
7
+ import numpy as np
8
  import pickle
9
  theme = gr.themes.Base(
10
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
 
27
  raise gr.Error("Text must be under 300 characters")
28
  v = voice.lower()
29
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
30
+ def longsynthesize(text, voice, progress=gr.Progress()):
31
+ if text.strip() == "":
32
+ raise gr.Error("You must enter some text")
33
+ texts = split_and_recombine_text(text)
34
+ v = voice.lower()
35
+ audios = []
36
+ for t in progress.tqdm(texts):
37
+ audios.append(styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
38
+ return (24000, np.concatenate(audios))
39
  def clsynthesize(text, voice):
40
  if text.strip() == "":
41
  raise gr.Error("You must enter some text")
 
70
  clbtn = gr.Button("Synthesize", variant="primary")
71
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
72
  clbtn.click(clsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
73
+ with gr.Blocks() as longText:
74
+ with gr.Row():
75
+ with gr.Column(scale=1):
76
+ clinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
77
+ clvoice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300)
78
+ with gr.Column(scale=1):
79
+ clbtn = gr.Button("Synthesize", variant="primary")
80
+ claudio = gr.Audio(interactive=False, label="Synthesized Audio")
81
+ clbtn.click(longsynthesize, inputs=[clinp, clvoice], outputs=[claudio], concurrency_limit=4)
82
  with gr.Blocks() as lj:
83
  with gr.Row():
84
  with gr.Column(scale=1):
 
100
 
101
  **NOTE: StyleTTS 2 does better on longer texts.** For example, making it say "hi" will produce a lower-quality result than making it say a longer phrase.""")
102
  gr.DuplicateButton("Duplicate Space")
103
+ gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'LJSpeech', 'Long Text [Beta]'])
104
  gr.Markdown("""
105
  Demo by by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
106
 
requirements.txt CHANGED
@@ -19,4 +19,5 @@ scipy
19
  phonemizer
20
  cached-path
21
  gradio
22
- gruut
 
 
19
  phonemizer
20
  cached-path
21
  gradio
22
+ gruut
23
+ tortoise-tts