linoyts HF staff commited on
Commit
8684377
1 Parent(s): e73da9c
Files changed (2) hide show
  1. app.py +15 -11
  2. style.css +1 -1
app.py CHANGED
@@ -153,6 +153,10 @@ Demo for the text-based editing method introduced in:
153
  <a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
154
  </p>
155
  <p style="font-size:larger">
 
 
 
 
156
  <b>Instructions:</b><br>
157
  Provide an input audio and a target prompt to edit the audio. <br>
158
  T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
@@ -169,7 +173,6 @@ For faster inference without waiting in queue, you may duplicate the space and u
169
  <a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
170
  <img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
171
  </p>
172
-
173
  """
174
 
175
  with gr.Blocks(css='style.css') as demo:
@@ -187,16 +190,14 @@ with gr.Blocks(css='style.css') as demo:
187
  do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
188
 
189
  with gr.Row():
190
- with gr.Column():
191
- src_prompt = gr.Textbox(label="OPTIONAL: Source Prompt", lines=2, interactive=True,
192
- placeholder="Optional: Describe the original audio input",)
193
- input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
194
- interactive=True, scale=1)
195
 
196
- with gr.Column():
197
- tar_prompt = gr.Textbox(label="Target Prompt", placeholder="Describe your desired edited output",
198
  lines=2, interactive=True)
199
- output_audio = gr.Audio(label="Edited Audio", interactive=False, scale=1)
200
 
201
  with gr.Row():
202
  with gr.Column():
@@ -204,13 +205,16 @@ with gr.Blocks(css='style.css') as demo:
204
 
205
  with gr.Row():
206
  t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
207
- info="Higher T-start -> stronger edit. Lower T-start -> more similar to original audio.")
208
  model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
209
  "cvssp/audioldm2-large",
210
  "cvssp/audioldm2-music"],
211
- info="Choose a checkpoint suitable for your intended audio and edit.",
212
  value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
213
  with gr.Accordion("More Options", open=False):
 
 
 
214
 
215
  with gr.Row():
216
  cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
 
153
  <a href="https://arxiv.org/abs/2402.10009" style="text-decoration: underline;" target="_blank"> Zero-Shot Unsupervised and Text-Based Audio Editing Using DDPM Inversion </a>
154
  </p>
155
  <p style="font-size:larger">
156
+
157
+ """
158
+
159
+ help = """
160
  <b>Instructions:</b><br>
161
  Provide an input audio and a target prompt to edit the audio. <br>
162
  T<sub>start</sub> is used to control the tradeoff between fidelity to the original signal and text-adhearance.
 
173
  <a href="https://huggingface.co/spaces/hilamanor/audioEditing?duplicate=true">
174
  <img style="margin-top: 0em; margin-bottom: 0em; display:inline" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" ></a>
175
  </p>
 
176
  """
177
 
178
  with gr.Blocks(css='style.css') as demo:
 
190
  do_inversion = gr.State(value=True) # To save some runtime when editing the same thing over and over
191
 
192
  with gr.Row():
193
+ input_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio",
194
+ interactive=True, scale=1)
195
+ output_audio = gr.Audio(label="Edited Audio", interactive=False, scale=1)
 
 
196
 
197
+ with gr.Row():
198
+ tar_prompt = gr.Textbox(label="Prompt", info="Describe your desired edited output", placeholder="a recording of a happy upbeat arcade game soundtrack",
199
  lines=2, interactive=True)
200
+
201
 
202
  with gr.Row():
203
  with gr.Column():
 
205
 
206
  with gr.Row():
207
  t_start = gr.Slider(minimum=30, maximum=160, value=110, step=1, label="T-start", interactive=True, scale=3,
208
+ info="Higher T-start -> stronger edit. Lower T-start -> closer to original audio")
209
  model_id = gr.Dropdown(label="AudioLDM2 Version", choices=["cvssp/audioldm2",
210
  "cvssp/audioldm2-large",
211
  "cvssp/audioldm2-music"],
212
+ info="Choose a checkpoint suitable for your intended audio and edit",
213
  value="cvssp/audioldm2-music", interactive=True, type="value", scale=2)
214
  with gr.Accordion("More Options", open=False):
215
+ with gr.Row():
216
+ src_prompt = gr.Textbox(label="Source Prompt", lines=2, interactive=True, info= "Optional: Describe the original audio input",
217
+ placeholder="A recording of a happy upbeat classical music piece",)
218
 
219
  with gr.Row():
220
  cfg_scale_src = gr.Number(value=3, minimum=0.5, maximum=25, precision=None,
style.css CHANGED
@@ -1,4 +1,4 @@
1
  .gradio-container {
2
- max-width: 1050px !important;
3
  padding-top: 1.5rem !important;
4
  }
 
1
  .gradio-container {
2
+ max-width: 700px !important;
3
  padding-top: 1.5rem !important;
4
  }