rookie9 commited on
Commit
0160e18
·
verified ·
1 Parent(s): 8ae83f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -9
app.py CHANGED
@@ -99,7 +99,7 @@ demo = gr.Interface(
99
  fn=infer,
100
  inputs=[
101
  gr.Textbox(label="TCC (necessary)", value="a dog barks"),
102
- gr.Textbox(label="TDC (optional, see format)", value="random"),
103
  gr.Textbox(label="Length (seconds, optional)", value="10.0"),
104
  gr.Checkbox(label="Enable Time Control", value=False),
105
  ],
@@ -108,14 +108,37 @@ demo = gr.Interface(
108
  gr.Textbox(label="Final TDC Used (input_onset)")
109
  ],
110
  title="PicoAudio2 Online Inference",
111
- description=(
112
- "TCC (temporal coarse caption) is necessary to generate audio. "
113
- "If you need time control, please enter TDC and length (temporal detailed caption, in seconds). "
114
- "Alternatively, you can let the LLM generate TDC, but API quota limits may affect availability. "
115
- "TDC format: \"event1(start1-end1, start2-end2); event2(start1-end1, start2-end2...)\", for example: "
116
- "\"a dog barks(1.0-2.0, 3.0-4.0); a man speaks(5.0-6.0)\""
117
- "If the format of TDC is wrong or no input length, the model will generate audio without temporal control. Sorry!"
118
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  )
120
  if __name__ == "__main__":
121
  demo.launch()
 
99
  fn=infer,
100
  inputs=[
101
  gr.Textbox(label="TCC (necessary)", value="a dog barks"),
102
+ gr.Textbox(label="TDC (optional, see format)", value="a dog barks(3.0-4.0, 6.0-7.0)"),
103
  gr.Textbox(label="Length (seconds, optional)", value="10.0"),
104
  gr.Checkbox(label="Enable Time Control", value=False),
105
  ],
 
108
  gr.Textbox(label="Final TDC Used (input_onset)")
109
  ],
110
  title="PicoAudio2 Online Inference",
111
+ description="""
112
+ ## Definition
113
+
114
+ **TCC (Temporal Coarse Caption):**
115
+ A brief text description for the overall audio scene.
116
+ *Example*: `a dog barks`
117
+
118
+ **TDC (Temporal Detailed Caption):**
119
+ A **caption with timestamp information** for each event.
120
+ It allows precise temporal control over when events happen in the generated audio.
121
+ *Example*: `a dog barks(1.0-2.0, 3.0-4.0); a man speaks(5.0-6.0)`
122
+
123
+ ---
124
+
125
+ ## Input Requirements & Format
126
+
127
+ - **TCC** is **required** for audio generation.
128
+ - **TDC** is **optional**. If provided, it should follow the format: event1(start1-end1, start2-end2); event2(start1-end1, ...)
129
+ *Example*: a dog barks(1.0-2.0, 3.0-4.0); a man speaks(5.0-6.0)
130
+ - **Length** (in seconds) is optional, but recommended for temporal control.
131
+ - **Enable Time Control**: Tick to use TDC and length for precise event timing.
132
+
133
+ ---
134
+
135
+ ## Notes
136
+
137
+ - If TDC format is incorrect or length is missing, the model will generate audio **without precise temporal control**.
138
+ - For general audio generation, it is recommended to input '"random"' for TDC.
139
+ - You may leave TDC blank to let the LLM generate timestamps automatically (subject to API quota).
140
+ ---
141
+ """
142
  )
143
  if __name__ == "__main__":
144
  demo.launch()