ylacombe commited on
Commit
a204cc2
·
verified ·
1 Parent(s): da23db0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -35,7 +35,6 @@ logger = logging.get_logger(__name__)
35
 
36
  class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditionalGeneration):
37
  stride_longform = 500
38
- max_longform_generation_length = 4000
39
 
40
 
41
  def _prepare_audio_encoder_kwargs_for_longform_generation(
@@ -61,6 +60,7 @@ class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditiona
61
  logits_processor: Optional[LogitsProcessorList] = None,
62
  stopping_criteria: Optional[StoppingCriteriaList] = None,
63
  synced_gpus: Optional[bool] = None,
 
64
  streamer: Optional["BaseStreamer"] = None,
65
  **kwargs,
66
  ):
@@ -638,13 +638,14 @@ def generate_audio(text_prompt, audio, audio_length_in_s=10.0, play_steps_in_s=2
638
  return_tensors="pt",
639
  )
640
 
641
- streamer = MusicgenStreamer(model, device=device, play_steps=play_steps, is_longform=True)
642
 
643
  generation_kwargs = dict(
644
  **inputs.to(device),
645
  temperature=1.2,
646
  streamer=streamer,
647
- max_new_tokens=max_new_tokens,
 
648
  )
649
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
650
  thread.start()
@@ -666,9 +667,9 @@ demo = gr.Interface(
666
  inputs=[
667
  gr.Text(label="Prompt", value="80s pop track with synth and instrumentals"),
668
  gr.Audio(type="numpy", label="Conditioning audio"),
669
- gr.Slider(10, 30, value=15, step=5, label="Audio length in seconds"),
670
  gr.Slider(0.5, 2.5, value=1.5, step=0.5, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps"),
671
- gr.Slider(0, 10, value=5, step=1, label="Seed for random generations"),
672
  ],
673
  outputs=[
674
  gr.Audio(label="Generated Music", autoplay=True, interactive=False, streaming=True)
 
35
 
36
  class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditionalGeneration):
37
  stride_longform = 500
 
38
 
39
 
40
  def _prepare_audio_encoder_kwargs_for_longform_generation(
 
60
  logits_processor: Optional[LogitsProcessorList] = None,
61
  stopping_criteria: Optional[StoppingCriteriaList] = None,
62
  synced_gpus: Optional[bool] = None,
63
+ max_longform_generation_length: Optional[int] = 4000,
64
  streamer: Optional["BaseStreamer"] = None,
65
  **kwargs,
66
  ):
 
638
  return_tensors="pt",
639
  )
640
 
641
+ streamer = MusicgenStreamer(model, device=device, play_steps=play_steps, is_longform=True, )
642
 
643
  generation_kwargs = dict(
644
  **inputs.to(device),
645
  temperature=1.2,
646
  streamer=streamer,
647
+ max_new_tokens=min(max_new_tokens, 1500),
648
+ max_longform_generation_length=max_new_tokens,
649
  )
650
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
651
  thread.start()
 
667
  inputs=[
668
  gr.Text(label="Prompt", value="80s pop track with synth and instrumentals"),
669
  gr.Audio(type="numpy", label="Conditioning audio"),
670
+ gr.Slider(15, 60, value=45, step=5, label="Audio length in seconds"),
671
  gr.Slider(0.5, 2.5, value=1.5, step=0.5, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps"),
672
+ gr.Number(value=5, precision=0, step=1, minimum=0, label="Seed for random generations"),
673
  ],
674
  outputs=[
675
  gr.Audio(label="Generated Music", autoplay=True, interactive=False, streaming=True)