DHEIVER commited on
Commit
197bc3e
1 Parent(s): 76a0ee2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -47
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import spaces
2
  import gradio as gr
3
  import torch
4
  from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
@@ -46,27 +45,19 @@ examples = [
46
 
47
  number_normalizer = EnglishNumberNormalizer()
48
 
49
-
50
  def preprocess(text):
51
  text = number_normalizer(text).strip()
52
- if text[-1] not in punctuation:
53
- text = f"{text}."
54
 
55
  abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
56
 
57
- def separate_abb(chunk):
58
- chunk = chunk.replace(".", "")
59
- print(chunk)
60
- return " ".join(chunk)
61
 
62
- abbreviations = re.findall(abbreviations_pattern, text)
63
- for abv in abbreviations:
64
- if abv in text:
65
- text = text.replace(abv, separate_abb(abv))
66
  return text
67
 
68
-
69
- @spaces.GPU
70
  def gen_tts(text, description):
71
  inputs = tokenizer(description, return_tensors="pt").to(device)
72
  prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
@@ -77,7 +68,6 @@ def gen_tts(text, description):
77
 
78
  return SAMPLE_RATE, audio_arr
79
 
80
-
81
  css = """
82
  #share-btn-container {
83
  display: flex;
@@ -114,38 +104,40 @@ css = """
114
  display: none !important;
115
  }
116
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  with gr.Blocks(css=css) as block:
118
- gr.HTML(
119
- """
120
- <div style="text-align: center; max-width: 700px; margin: 0 auto;">
121
- <div
122
- style="
123
- display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
124
- "
125
- >
126
- <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
127
- Parler-TTS: Expresso ☕️️
128
- </h1>
129
- </div>
130
- </div>
131
- """
132
- )
133
- gr.HTML(
134
- f"""
135
- <p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
136
- is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
137
- It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
138
-
139
- <p>Tips for ensuring good generation:
140
- <ul>
141
- <li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
142
- <li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
143
- <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
144
- <li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
145
- </ul>
146
- </p>
147
- """
148
- )
149
  with gr.Row():
150
  with gr.Column():
151
  input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
@@ -168,4 +160,4 @@ with gr.Blocks(css=css) as block:
168
  )
169
 
170
  block.queue()
171
- block.launch(share=True)
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers.models.speecht5.number_normalizer import EnglishNumberNormalizer
 
45
 
46
  number_normalizer = EnglishNumberNormalizer()
47
 
 
48
  def preprocess(text):
49
  text = number_normalizer(text).strip()
50
+ if not text.endswith(punctuation):
51
+ text += "."
52
 
53
  abbreviations_pattern = r'\b[A-Z][A-Z\.]+\b'
54
 
55
+ def separate_abb(match):
56
+ return match.group(0).replace(".", " ")
 
 
57
 
58
+ text = re.sub(abbreviations_pattern, separate_abb, text)
 
 
 
59
  return text
60
 
 
 
61
  def gen_tts(text, description):
62
  inputs = tokenizer(description, return_tensors="pt").to(device)
63
  prompt = tokenizer(preprocess(text), return_tensors="pt").to(device)
 
68
 
69
  return SAMPLE_RATE, audio_arr
70
 
 
71
  css = """
72
  #share-btn-container {
73
  display: flex;
 
104
  display: none !important;
105
  }
106
  """
107
+
108
+ html_blocks = [
109
+ """
110
+ <div style="text-align: center; max-width: 700px; margin: 0 auto;">
111
+ <div
112
+ style="
113
+ display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;
114
+ "
115
+ >
116
+ <h1 style="font-weight: 900; margin-bottom: 7px; line-height: normal;">
117
+ Parler-TTS: Expresso ☕️️
118
+ </h1>
119
+ </div>
120
+ </div>
121
+ """,
122
+ f"""
123
+ <p><a href="https://huggingface.co/parler-tts/parler-tts-mini-expresso"> Parler-TTS Mini: Expresso</a>
124
+ is a text-to-speech (TTS) model fine-tuned on the <a href="https://huggingface.co/datasets/ylacombe/expresso"> Expresso dataset</a>.
125
+ It generates high-quality speech in a given <b>emotion</b> and <b>voice</b> that can be controlled through a simple text prompt.</p>
126
+ <p>Tips for ensuring good generation:
127
+ <ul>
128
+ <li>Specify the name of a male speaker (Jerry, Thomas) or female speaker (Talia, Elisabeth) for consistent voices</li>
129
+ <li>The model can generate in a range of emotions, including: "happy", "confused", "default" (meaning no particular emotion conveyed), "laughing", "sad", "whisper", "emphasis"</li>
130
+ <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
131
+ <li>To emphasise particular words, wrap them in asterisk (e.g. *you* in the example above) and include "emphasis" in the prompt</li>
132
+ </ul>
133
+ </p>
134
+ """
135
+ ]
136
+
137
  with gr.Blocks(css=css) as block:
138
+ for html_block in html_blocks:
139
+ gr.HTML(html_block)
140
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  with gr.Row():
142
  with gr.Column():
143
  input_text = gr.Textbox(label="Input Text", lines=2, value=default_text, elem_id="input_text")
 
160
  )
161
 
162
  block.queue()
163
+ block.launch(share=True)