herimor commited on
Commit
0d0d952
·
1 Parent(s): ae1f9c4

Add max audio length handling

Browse files
Files changed (3) hide show
  1. app.py +6 -4
  2. configs/generator.json +3 -0
  3. requirements.txt +1 -1
app.py CHANGED
@@ -91,18 +91,20 @@ def main():
91
  prompt_audio = gr.Audio(
92
  sources=["microphone", "upload"],
93
  type="filepath",
94
- label="Prompt audio (3-5 sec of target voice)",
95
  )
96
  prompt_text = gr.Textbox(
97
  lines=3,
98
- label="Prompt transcript",
99
- placeholder="Text that matches the prompt audio (Required)",
 
100
  )
101
 
102
  with gr.Column(scale=1, elem_id="right-col"):
103
  target_text = gr.Textbox(
104
  lines=3,
105
- label="Target text",
 
106
  placeholder="What you want the model to say",
107
  )
108
  output_audio = gr.Audio(
 
91
  prompt_audio = gr.Audio(
92
  sources=["microphone", "upload"],
93
  type="filepath",
94
+ label="Prompt audio (3-5 sec of target voice. Max 10 sec)",
95
  )
96
  prompt_text = gr.Textbox(
97
  lines=3,
98
+ max_length=config.max_prompt_chars,
99
+ label=f"Prompt transcript. Max characters: {config.max_prompt_chars} (Required)",
100
+ placeholder="Text that matches the prompt audio",
101
  )
102
 
103
  with gr.Column(scale=1, elem_id="right-col"):
104
  target_text = gr.Textbox(
105
  lines=3,
106
+ max_length=config.max_phone_tokens,
107
+ label=f"Target text. Max characters: {config.max_phone_tokens}",
108
  placeholder="What you want the model to say",
109
  )
110
  output_audio = gr.Audio(
configs/generator.json CHANGED
@@ -26,6 +26,9 @@
26
  "phoneme_dict_name": "phoneme_to_token.json",
27
  "nltk_resource": "taggers/averaged_perceptron_tagger_eng",
28
  "aligner": "charsiu/en_w2v2_fc_10ms",
 
 
 
29
  "cache_prompt": false,
30
  "phoneme_index_map": {
31
  "0": [
 
26
  "phoneme_dict_name": "phoneme_to_token.json",
27
  "nltk_resource": "taggers/averaged_perceptron_tagger_eng",
28
  "aligner": "charsiu/en_w2v2_fc_10ms",
29
+ "max_prompt_sec": 10,
30
+ "max_prompt_chars": 250,
31
+ "max_phone_tokens": 1000,
32
  "cache_prompt": false,
33
  "phoneme_index_map": {
34
  "0": [
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- voxtream==0.1.3
2
  gradio_client==1.3.0
3
  pydantic==2.10.6
 
1
+ voxtream==0.1.4
2
  gradio_client==1.3.0
3
  pydantic==2.10.6