Remsky commited on
Commit
c6b98ce
·
1 Parent(s): 3d786a0

Refactor app.py to improve UI structure and modularize header content; add ui_content.py for HTML and Markdown components

Browse files
Files changed (2) hide show
  1. app.py +19 -24
  2. lib/ui_content.py +36 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
6
  import numpy as np
7
  from tts_model import TTSModel
8
  from lib import format_audio_output
 
9
 
10
  # Set HF_HOME for faster restarts with cached models/voices
11
  os.environ["HF_HOME"] = "/data/.huggingface"
@@ -123,26 +124,18 @@ def generate_speech_from_ui(text, voice_name, speed, progress=gr.Progress(track_
123
  raise gr.Error(f"Generation failed: {str(e)}")
124
 
125
  # Create Gradio interface
126
- with gr.Blocks(title="Kokoro TTS Demo") as demo:
127
- gr.HTML(
128
- """
129
- <div style="display: flex; justify-content: flex-end; padding: 5px; gap: 5px;">
130
- <a class="github-button" href="https://github.com/remsky/Kokoro-FastAPI" data-color-scheme="no-preference: light; light: light; dark: dark;" data-size="large" data-show-count="true" aria-label="Star remsky/Kokoro-FastAPI on GitHub">Kokoro-FastAPI Repo</a>
131
- <a href="https://huggingface.co/hexgrad/Kokoro-82M" target="_blank">
132
- <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-lg-dark.svg" alt="Model on HF">
133
- </a>
134
- </div>
135
- <div style="text-align: center; max-width: 800px; margin: 0 auto;">
136
- <h1>Kokoro TTS Demo</h1>
137
- <p>Convert text to natural-sounding speech using various voices.</p>
138
- </div>
139
- <script async defer src="https://buttons.github.io/buttons.js"></script>
140
- """
141
- )
142
 
143
  with gr.Row():
144
  # Column 1: Text Input
145
- with gr.Column():
146
  text_input = gr.TextArea(
147
  label="Text to speak",
148
  placeholder="Enter text here or upload a .txt file",
@@ -151,7 +144,7 @@ with gr.Blocks(title="Kokoro TTS Demo") as demo:
151
  )
152
 
153
  # Column 2: Controls
154
- with gr.Column():
155
  file_input = gr.File(
156
  label="Upload .txt file",
157
  file_types=[".txt"],
@@ -173,10 +166,15 @@ with gr.Blocks(title="Kokoro TTS Demo") as demo:
173
  )
174
 
175
  with gr.Group():
 
 
 
 
 
176
  voice_dropdown = gr.Dropdown(
177
  label="Voice",
178
  choices=voice_list,
179
- value=voice_list[0] if voice_list else None,
180
  allow_custom_value=True
181
  )
182
  speed_slider = gr.Slider(
@@ -189,7 +187,7 @@ with gr.Blocks(title="Kokoro TTS Demo") as demo:
189
  submit_btn = gr.Button("Generate Speech", variant="primary")
190
 
191
  # Column 3: Output
192
- with gr.Column():
193
  audio_output = gr.Audio(
194
  label="Generated Speech",
195
  type="numpy",
@@ -219,10 +217,7 @@ with gr.Blocks(title="Kokoro TTS Demo") as demo:
219
  # Add text analysis info
220
  with gr.Row():
221
  with gr.Column():
222
- gr.Markdown("""
223
- ### Demo Text Info
224
- The demo text is loaded from H.G. Wells' "The Time Machine". This classic text demonstrates the system's ability to handle long-form content through chunking.
225
- """)
226
 
227
  # Launch the app
228
  if __name__ == "__main__":
 
6
  import numpy as np
7
  from tts_model import TTSModel
8
  from lib import format_audio_output
9
+ from lib.ui_content import header_html, demo_text_info
10
 
11
  # Set HF_HOME for faster restarts with cached models/voices
12
  os.environ["HF_HOME"] = "/data/.huggingface"
 
124
  raise gr.Error(f"Generation failed: {str(e)}")
125
 
126
  # Create Gradio interface
127
+ with gr.Blocks(title="Kokoro TTS Demo", css="""
128
+ .equal-height {
129
+ min-height: 400px;
130
+ display: flex;
131
+ flex-direction: column;
132
+ }
133
+ """) as demo:
134
+ gr.HTML(header_html)
 
 
 
 
 
 
 
 
135
 
136
  with gr.Row():
137
  # Column 1: Text Input
138
+ with gr.Column(elem_classes="equal-height"):
139
  text_input = gr.TextArea(
140
  label="Text to speak",
141
  placeholder="Enter text here or upload a .txt file",
 
144
  )
145
 
146
  # Column 2: Controls
147
+ with gr.Column(elem_classes="equal-height"):
148
  file_input = gr.File(
149
  label="Upload .txt file",
150
  file_types=[".txt"],
 
166
  )
167
 
168
  with gr.Group():
169
+ default_voice = 'af_sky' if 'af_sky' in voice_list \
170
+ else voice_list[0] \
171
+ if voice_list else \
172
+ None
173
+
174
  voice_dropdown = gr.Dropdown(
175
  label="Voice",
176
  choices=voice_list,
177
+ value=default_voice,
178
  allow_custom_value=True
179
  )
180
  speed_slider = gr.Slider(
 
187
  submit_btn = gr.Button("Generate Speech", variant="primary")
188
 
189
  # Column 3: Output
190
+ with gr.Column(elem_classes="equal-height"):
191
  audio_output = gr.Audio(
192
  label="Generated Speech",
193
  type="numpy",
 
217
  # Add text analysis info
218
  with gr.Row():
219
  with gr.Column():
220
+ gr.Markdown(demo_text_info)
 
 
 
221
 
222
  # Launch the app
223
  if __name__ == "__main__":
lib/ui_content.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HTML content for the header section
2
+ header_html = """
3
+ <div>
4
+ <!-- Top badges bar -->
5
+ <div style="display: flex; justify-content: flex-end; padding: 4px; gap: 8px; height: 32px; align-items: center;">
6
+ <div style="height: 28px; display: flex; align-items: center; margin-top: 3px;">
7
+ <a class="github-button" href="https://github.com/remsky/Kokoro-FastAPI" data-color-scheme="no-preference: dark; light: dark; dark: dark;" data-size="large" data-show-count="true" aria-label="Star remsky/Kokoro-FastAPI on GitHub">Kokoro-FastAPI Repo</a>
8
+ </div>
9
+ <a href="https://huggingface.co/hexgrad/Kokoro-82M" target="_blank" style="height: 28px; display: flex; align-items: center;">
10
+ <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/model-on-hf-lg-dark.svg" alt="Model on HF" style="height: 100%;">
11
+ </a>
12
+ </div>
13
+
14
+ <div style="text-align: center; margin-bottom: 1rem;">
15
+ <h1 style="font-size: 1.75rem; font-weight: bold; color: #ffffff; margin-bottom: 0.5rem;">Kokoro TTS Demo</h1>
16
+ <p style="color: #d1d5db;">Convert text to natural-sounding speech using various voices.</p>
17
+ </div>
18
+
19
+ <div style="display: flex; gap: 1rem;">
20
+ <div style="flex: 1; background: rgba(30, 58, 138, 0.3); border: 1px solid rgba(59, 130, 246, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
21
+ <span style="font-weight: 500; color: #60a5fa; text-align: center;">⏱️ Small requests/Initial chunks can be slower due to warm-up</span>
22
+ </div>
23
+
24
+ <div style="flex: 1; background: rgba(147, 51, 234, 0.3); border: 1px solid rgba(168, 85, 247, 0.3); padding: 0.5rem 1rem; border-radius: 6px; display: flex; align-items: center; justify-content: center;">
25
+ <span style="font-weight: 500; color: #e879f9; text-align: center;">⚠️ 120-second timeout per request (~1+ hour of output audio)</span>
26
+ </div>
27
+ </div>
28
+ </div>
29
+ <script async defer src="https://buttons.github.io/buttons.js"></script>
30
+ """
31
+
32
+ # Markdown content for demo text info
33
+ demo_text_info = """
34
+ ### Demo Text Info
35
+ The demo text is loaded from H.G. Wells' "The Time Machine". This classic text demonstrates the system's ability to handle long-form content through chunking.
36
+ """