lauraibnz commited on
Commit
91aeeb9
1 Parent(s): 2342079

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -21,7 +21,7 @@ pipe = pipe.to(device)
21
  generator = torch.Generator(device)
22
 
23
 
24
- def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, random_seed=0, controlnet_conditioning_scale=1, num_inference_steps=20, guess_mode=False):
25
  if isinstance(midi_file, _TemporaryFileWrapper):
26
  midi_file = midi_file.name
27
  midi = PrettyMIDI(midi_file)
@@ -34,6 +34,7 @@ def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5,
34
  controlnet_conditioning_scale=float(controlnet_conditioning_scale),
35
  guess_mode=guess_mode,
36
  generator=generator.manual_seed(int(random_seed)),
 
37
  )
38
  return (16000, audio.audios.T)
39
 
@@ -48,19 +49,20 @@ with gr.Blocks(title="🎹 MIDI-AudioLDM", theme=gr.themes.Base(text_size=gr.the
48
  """)
49
  with gr.Row():
50
  with gr.Column(variant='panel'):
51
- midi = gr.File(label="midi file", file_types=[".mid"], info="Load the MIDI file that you want to use as conditioning.")
52
- prompt = gr.Textbox(label="prompt", info="Enter a descriptive text prompt.")
53
  with gr.Column(variant='panel'):
54
  audio = gr.Audio(label="audio")
55
  with gr.Accordion("Advanced Settings", open=False):
56
- neg_prompt = gr.Textbox(label="negative prompt", info="Enter a negative prompt not to guide the audio generation.")
57
  duration = gr.Slider(0, 30, value=5, step=5, label="duration (seconds)", info="Modify the duration of the output audio file.")
58
- seed = gr.Number(value=42, label="seed", info="Change the random seed for a different generation result.")
59
- cond = gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale", info="Enter a value between 0 and 1. The larger the more it will take the conditioning into account.")
60
- inf = gr.Slider(0, 50, value=20, step=0.1, label="inference steps", info="Edit the number of denoising steps. More inference steps usually leads to better but slower results.")
61
- guess = gr.Checkbox(label="guess mode", info="If true, the model will try to recognize the content of the conditioning without the need of a text prompt.")
 
 
62
  btn = gr.Button("Generate")
63
- btn.click(predict, inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guess], outputs=[audio])
64
- gr.Examples(examples=[["S00.mid", "piano", "", 10, 25, 1.0, 20, False], ["S00.mid", "violin", "", 10, 25, 1.0, 20, False], ["S00.mid", "woman singing", "", 10, 25, 0.8, 20, False]], inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guess], fn=predict, outputs=audio, cache_examples=True)
65
 
66
  demo.launch()
 
21
  generator = torch.Generator(device)
22
 
23
 
24
+ def predict(midi_file=None, prompt="", negative_prompt="", audio_length_in_s=5, random_seed=0, controlnet_conditioning_scale=1, num_inference_steps=20, guidance_scale=2.5, guess_mode=False):
25
  if isinstance(midi_file, _TemporaryFileWrapper):
26
  midi_file = midi_file.name
27
  midi = PrettyMIDI(midi_file)
 
34
  controlnet_conditioning_scale=float(controlnet_conditioning_scale),
35
  guess_mode=guess_mode,
36
  generator=generator.manual_seed(int(random_seed)),
37
+ guidance_scale=float(guidance_scale),
38
  )
39
  return (16000, audio.audios.T)
40
 
 
49
  """)
50
  with gr.Row():
51
  with gr.Column(variant='panel'):
52
+ midi = gr.File(label="midi", file_types=[".mid"])
53
+ prompt = gr.Textbox(label="prompt", info="Enter a descriptive text prompt to guide the audio generation.")
54
  with gr.Column(variant='panel'):
55
  audio = gr.Audio(label="audio")
56
  with gr.Accordion("Advanced Settings", open=False):
 
57
  duration = gr.Slider(0, 30, value=5, step=5, label="duration (seconds)", info="Modify the duration of the output audio file.")
58
+ inf = gr.Slider(0, 50, value=20, step=0.1, label="inference steps", info="Edit the number of denoising steps. More inference steps usually leads to higher quality but slower results.")
59
+ guidance_scale = gr.Slider(0, 4, value=2.5, step=0.5, label="guidance scale", info="Modify the guidance scale. The higher the value the more linked is the generated audio to the text prompt, sometimes at the expense of lower quality.")
60
+ neg_prompt = gr.Textbox(label="negative prompt", info="Optionally enter a negative text prompt not to guide the audio generation.")
61
+ seed = gr.Number(value=42, label="random seed", info="Change the random seed for a different generation result.")
62
+ cond = gr.Slider(0.0, 1.0, value=1.0, step=0.1, label="conditioning scale", info="Enter a value between 0 and 1. The larger the more it will take the conditioning into account. Lower values are recommended for more creative prompts.")
63
+ guess = gr.Checkbox(label="guess mode", info="If selected, the model will try to recognize the content of the MIDI without the need of a text prompt.")
64
  btn = gr.Button("Generate")
65
+ btn.click(predict, inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guidance_scale, guess], outputs=[audio])
66
+ # gr.Examples(examples=[["S00.mid", "piano", "", 10, 25, 1.0, 20, 2.5, False], ["S00.mid", "violin", "", 10, 25, 1.0, 20, 2.5, False], ["S00.mid", "woman singing", "", 10, 25, 0.8, 20, 2.5, False]], inputs=[midi, prompt, neg_prompt, duration, seed, cond, inf, guidance_scale, guess], fn=predict, outputs=audio, cache_examples=True)
67
 
68
  demo.launch()