Spaces:

descript
/

vampnet

Runtime error

App Files Files Community

Hugo Flores Garcia commited on Apr 12, 2023

Commit

03f09ee

1 Parent(s): f3f4634

better sampling defaults

Browse files

Files changed (2) hide show

demo.py +45 -53
vampnet/modules/base.py +2 -2

demo.py CHANGED Viewed

@@ -115,7 +115,7 @@ def vamp(
                 sig,
                 temperature=(init_temp, final_temp),
                 prefix_dur_s=prefix_s,
-                suffix_dur_s=suffix_s,
                 num_loops=num_vamps,
                 downsample_factor=mask_periodic_amt,
                 intensity=rand_mask_intensity,
@@ -199,9 +199,6 @@ with gr.Blocks() as demo:
             5. Listen to the generated audio
             6. If you noticed something you liked, write some notes, click the "save vamp" button, and copy the save code
             """)
             gr.Markdown("## Input Audio")
         with gr.Column():
@@ -211,12 +208,6 @@ with gr.Blocks() as demo:
             - mask hints are used to guide vampnet to generate audio that sounds like the original
             - the more hints you give, the more the generated audio will sound like the original
             """)
         with gr.Column():
             gr.Markdown("""
@@ -228,6 +219,7 @@ with gr.Blocks() as demo:
             - if you want a more "random" generation:
                 - uncheck the beat sync button (or reduce the beat unmask duration)
                 - increase the periodic unmasking to 16 or more
             """)
@@ -281,11 +273,11 @@ with gr.Blocks() as demo:
         with gr.Column():
             mask_periodic_amt = gr.Slider(
-                label="periodic unmasking factor (provides a rhythmic, periodic hint). 0.0 means no hint, 2 means one hint every 2 timesteps, etc, 4 means one hint every 4 timesteps, etc.",
                 minimum=0,
-                maximum=32,
                 step=1,
-                value=16,
             )
@@ -296,32 +288,33 @@ with gr.Blocks() as demo:
                 value=1.0
             )
-            prefix_s = gr.Slider(
-                label="prefix hint length (seconds)",
-                minimum=0.0,
-                maximum=10.0,
-                value=0.0
-            )
-            suffix_s = gr.Slider(
-                label="suffix hint length (seconds)",
-                minimum=0.0,
-                maximum=10.0,
-                value=0.0
-            )
-            init_temp = gr.Slider(
-                label="initial temperature (should probably stay between 0.6 and 1)",
-                minimum=0.0,
-                maximum=1.5,
-                value=0.8
-            )
-            final_temp = gr.Slider(
-                label="final temperature (should probably stay between 0.7 and 2)",
-                minimum=0.0,
-                maximum=2.0,
-                value=0.9
-            )
             use_beats = gr.Checkbox(
                 label="use beat hints",
@@ -333,10 +326,9 @@ with gr.Blocks() as demo:
                 minimum=4,
                 maximum=128,
                 step=1,
-                value=24
             )
             vamp_button = gr.Button("vamp!!!")
             output_audio = gr.Audio(
@@ -365,7 +357,7 @@ with gr.Blocks() as demo:
                     label="duration",
                     minimum=0.0,
                     maximum=3.0,
-                    value=0.1
                 )
                 with gr.Accordion("downbeat settings", open=False):
                     mask_dwn_chk = gr.Checkbox(
@@ -392,19 +384,19 @@ with gr.Blocks() as demo:
                         step=1
                     )
-                notes_text = gr.Textbox(
-                    label="type any notes about the generated audio here",
-                    value="",
-                    interactive=True
-                )
-                save_button = gr.Button("download vamp")
-                download_file = gr.File(
-                    label="vamp to download will appear here",
-                    interactive=False
-                )
-                thank_you = gr.Markdown("")
     # connect widgets

                 sig,
                 temperature=(init_temp, final_temp),
                 prefix_dur_s=prefix_s,
+                suffix_dur_s=prefix_s, # suffix should be same length as prefix
                 num_loops=num_vamps,
                 downsample_factor=mask_periodic_amt,
                 intensity=rand_mask_intensity,
             5. Listen to the generated audio
             6. If you noticed something you liked, write some notes, click the "save vamp" button, and copy the save code
             """)
             gr.Markdown("## Input Audio")
         with gr.Column():
             - mask hints are used to guide vampnet to generate audio that sounds like the original
             - the more hints you give, the more the generated audio will sound like the original
             """)
         with gr.Column():
             gr.Markdown("""
             - if you want a more "random" generation:
                 - uncheck the beat sync button (or reduce the beat unmask duration)
                 - increase the periodic unmasking to 16 or more
+                - increase the temperatures!
             """)
         with gr.Column():
             mask_periodic_amt = gr.Slider(
+                label="periodic hint  (0.0 means no hint, 2 means one hint every 2 timesteps, etc, 4 means one hint every 4 timesteps, etc)",
                 minimum=0,
+                maximum=64,
                 step=1,
+                value=19,
             )
                 value=1.0
             )
+            with gr.Accordion("prefix/suffix hints", open=False):
+                prefix_s = gr.Slider(
+                    label="prefix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+                suffix_s = gr.Slider(
+                    label="suffix hint length (seconds)",
+                    minimum=0.0,
+                    maximum=10.0,
+                    value=0.0
+                )
+            with gr.Accordion("temperature settings", open=False):
+                init_temp = gr.Slider(
+                    label="initial temperature (should probably stay between 0.6 and 1)",
+                    minimum=0.0,
+                    maximum=1.5,
+                    value=0.8
+                )
+                final_temp = gr.Slider(
+                    label="final temperature (should probably stay between 0.7 and 2)",
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=1.0
+                )
             use_beats = gr.Checkbox(
                 label="use beat hints",
                 minimum=4,
                 maximum=128,
                 step=1,
+                value=36
             )
             vamp_button = gr.Button("vamp!!!")
             output_audio = gr.Audio(
                     label="duration",
                     minimum=0.0,
                     maximum=3.0,
+                    value=0.07
                 )
                 with gr.Accordion("downbeat settings", open=False):
                     mask_dwn_chk = gr.Checkbox(
                         step=1
                     )
+            notes_text = gr.Textbox(
+                label="type any notes about the generated audio here",
+                value="",
+                interactive=True
+            )
+            save_button = gr.Button("save vamp")
+            download_file = gr.File(
+                label="vamp to download will appear here",
+                interactive=False
+            )
+            thank_you = gr.Markdown("")
     # connect widgets

vampnet/modules/base.py CHANGED Viewed

@@ -181,7 +181,7 @@ class VampBase(at.ml.BaseModel):
         self,
         codec,
         time_steps: int = 400,
-        sampling_steps: int = 12,
         start_tokens: Optional[torch.Tensor] = None,
         mask: Optional[torch.Tensor] = None,
         temperature: Union[float, Tuple[float, float]] = 0.8,
@@ -290,7 +290,7 @@ class VampBase(at.ml.BaseModel):
         self,
         codec,
         time_steps: int = 300,
-        sampling_steps: int = 12,
         start_tokens: Optional[torch.Tensor] = None,
         mask: Optional[torch.Tensor] = None,
         temperature: Union[float, Tuple[float, float]] = 0.8,

         self,
         codec,
         time_steps: int = 400,
+        sampling_steps: int = 36,
         start_tokens: Optional[torch.Tensor] = None,
         mask: Optional[torch.Tensor] = None,
         temperature: Union[float, Tuple[float, float]] = 0.8,
         self,
         codec,
         time_steps: int = 300,
+        sampling_steps: int = 36,
         start_tokens: Optional[torch.Tensor] = None,
         mask: Optional[torch.Tensor] = None,
         temperature: Union[float, Tuple[float, float]] = 0.8,