Spaces:
Running
on
Zero
Running
on
Zero
Add max audio length handling
Browse files- app.py +6 -4
- configs/generator.json +3 -0
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -91,18 +91,20 @@ def main():
|
|
| 91 |
prompt_audio = gr.Audio(
|
| 92 |
sources=["microphone", "upload"],
|
| 93 |
type="filepath",
|
| 94 |
-
label="Prompt audio (3-5 sec of target voice)",
|
| 95 |
)
|
| 96 |
prompt_text = gr.Textbox(
|
| 97 |
lines=3,
|
| 98 |
-
|
| 99 |
-
|
|
|
|
| 100 |
)
|
| 101 |
|
| 102 |
with gr.Column(scale=1, elem_id="right-col"):
|
| 103 |
target_text = gr.Textbox(
|
| 104 |
lines=3,
|
| 105 |
-
|
|
|
|
| 106 |
placeholder="What you want the model to say",
|
| 107 |
)
|
| 108 |
output_audio = gr.Audio(
|
|
|
|
| 91 |
prompt_audio = gr.Audio(
|
| 92 |
sources=["microphone", "upload"],
|
| 93 |
type="filepath",
|
| 94 |
+
label="Prompt audio (3-5 sec of target voice. Max 10 sec)",
|
| 95 |
)
|
| 96 |
prompt_text = gr.Textbox(
|
| 97 |
lines=3,
|
| 98 |
+
max_length=config.max_prompt_chars,
|
| 99 |
+
label=f"Prompt transcript. Max characters: {config.max_prompt_chars} (Required)",
|
| 100 |
+
placeholder="Text that matches the prompt audio",
|
| 101 |
)
|
| 102 |
|
| 103 |
with gr.Column(scale=1, elem_id="right-col"):
|
| 104 |
target_text = gr.Textbox(
|
| 105 |
lines=3,
|
| 106 |
+
max_length=config.max_phone_tokens,
|
| 107 |
+
label=f"Target text. Max characters: {config.max_phone_tokens}",
|
| 108 |
placeholder="What you want the model to say",
|
| 109 |
)
|
| 110 |
output_audio = gr.Audio(
|
configs/generator.json
CHANGED
|
@@ -26,6 +26,9 @@
|
|
| 26 |
"phoneme_dict_name": "phoneme_to_token.json",
|
| 27 |
"nltk_resource": "taggers/averaged_perceptron_tagger_eng",
|
| 28 |
"aligner": "charsiu/en_w2v2_fc_10ms",
|
|
|
|
|
|
|
|
|
|
| 29 |
"cache_prompt": false,
|
| 30 |
"phoneme_index_map": {
|
| 31 |
"0": [
|
|
|
|
| 26 |
"phoneme_dict_name": "phoneme_to_token.json",
|
| 27 |
"nltk_resource": "taggers/averaged_perceptron_tagger_eng",
|
| 28 |
"aligner": "charsiu/en_w2v2_fc_10ms",
|
| 29 |
+
"max_prompt_sec": 10,
|
| 30 |
+
"max_prompt_chars": 250,
|
| 31 |
+
"max_phone_tokens": 1000,
|
| 32 |
"cache_prompt": false,
|
| 33 |
"phoneme_index_map": {
|
| 34 |
"0": [
|
requirements.txt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
-
voxtream==0.1.
|
| 2 |
gradio_client==1.3.0
|
| 3 |
pydantic==2.10.6
|
|
|
|
| 1 |
+
voxtream==0.1.4
|
| 2 |
gradio_client==1.3.0
|
| 3 |
pydantic==2.10.6
|