Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -35,7 +35,6 @@ logger = logging.get_logger(__name__)
|
|
35 |
|
36 |
class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditionalGeneration):
|
37 |
stride_longform = 500
|
38 |
-
max_longform_generation_length = 4000
|
39 |
|
40 |
|
41 |
def _prepare_audio_encoder_kwargs_for_longform_generation(
|
@@ -61,6 +60,7 @@ class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditiona
|
|
61 |
logits_processor: Optional[LogitsProcessorList] = None,
|
62 |
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
63 |
synced_gpus: Optional[bool] = None,
|
|
|
64 |
streamer: Optional["BaseStreamer"] = None,
|
65 |
**kwargs,
|
66 |
):
|
@@ -638,13 +638,14 @@ def generate_audio(text_prompt, audio, audio_length_in_s=10.0, play_steps_in_s=2
|
|
638 |
return_tensors="pt",
|
639 |
)
|
640 |
|
641 |
-
streamer = MusicgenStreamer(model, device=device, play_steps=play_steps, is_longform=True)
|
642 |
|
643 |
generation_kwargs = dict(
|
644 |
**inputs.to(device),
|
645 |
temperature=1.2,
|
646 |
streamer=streamer,
|
647 |
-
max_new_tokens=max_new_tokens,
|
|
|
648 |
)
|
649 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
650 |
thread.start()
|
@@ -666,9 +667,9 @@ demo = gr.Interface(
|
|
666 |
inputs=[
|
667 |
gr.Text(label="Prompt", value="80s pop track with synth and instrumentals"),
|
668 |
gr.Audio(type="numpy", label="Conditioning audio"),
|
669 |
-
gr.Slider(
|
670 |
gr.Slider(0.5, 2.5, value=1.5, step=0.5, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps"),
|
671 |
-
gr.
|
672 |
],
|
673 |
outputs=[
|
674 |
gr.Audio(label="Generated Music", autoplay=True, interactive=False, streaming=True)
|
|
|
35 |
|
36 |
class MusicgenMelodyForLongFormConditionalGeneration(MusicgenMelodyForConditionalGeneration):
|
37 |
stride_longform = 500
|
|
|
38 |
|
39 |
|
40 |
def _prepare_audio_encoder_kwargs_for_longform_generation(
|
|
|
60 |
logits_processor: Optional[LogitsProcessorList] = None,
|
61 |
stopping_criteria: Optional[StoppingCriteriaList] = None,
|
62 |
synced_gpus: Optional[bool] = None,
|
63 |
+
max_longform_generation_length: Optional[int] = 4000,
|
64 |
streamer: Optional["BaseStreamer"] = None,
|
65 |
**kwargs,
|
66 |
):
|
|
|
638 |
return_tensors="pt",
|
639 |
)
|
640 |
|
641 |
+
streamer = MusicgenStreamer(model, device=device, play_steps=play_steps, is_longform=True, )
|
642 |
|
643 |
generation_kwargs = dict(
|
644 |
**inputs.to(device),
|
645 |
temperature=1.2,
|
646 |
streamer=streamer,
|
647 |
+
max_new_tokens=min(max_new_tokens, 1500),
|
648 |
+
max_longform_generation_length=max_new_tokens,
|
649 |
)
|
650 |
thread = Thread(target=model.generate, kwargs=generation_kwargs)
|
651 |
thread.start()
|
|
|
667 |
inputs=[
|
668 |
gr.Text(label="Prompt", value="80s pop track with synth and instrumentals"),
|
669 |
gr.Audio(type="numpy", label="Conditioning audio"),
|
670 |
+
gr.Slider(15, 60, value=45, step=5, label="Audio length in seconds"),
|
671 |
gr.Slider(0.5, 2.5, value=1.5, step=0.5, label="Streaming interval in seconds", info="Lower = shorter chunks, lower latency, more codec steps"),
|
672 |
+
gr.Number(value=5, precision=0, step=1, minimum=0, label="Seed for random generations"),
|
673 |
],
|
674 |
outputs=[
|
675 |
gr.Audio(label="Generated Music", autoplay=True, interactive=False, streaming=True)
|