audio-diffusion_style_transfer

Running

App Files Files Community

nakas commited on Nov 6, 2022

Commit

3d5dc51

•

1 Parent(s): f109f30

add step value slider

Browse files

Files changed (1) hide show

app.py +6 -5

app.py CHANGED Viewed

@@ -5,12 +5,12 @@ import gradio as gr
 from audiodiffusion import AudioDiffusion
-def generate_spectrogram_audio_and_loop(audio_file,model_id):
     print(audio_file)
     print(model_id)
     audio_diffusion = AudioDiffusion(model_id=model_id)
     image, (sample_rate,
-            audio) = audio_diffusion.generate_spectrogram_and_audio_from_audio(audio_file)
     loop = AudioDiffusion.loop_it(audio, sample_rate)
     if loop is None:
         loop = audio
@@ -20,10 +20,11 @@ def generate_spectrogram_audio_and_loop(audio_file,model_id):
 demo = gr.Interface(fn=generate_spectrogram_audio_and_loop,
                     title="Audio Diffusion",
                     description="Forked from https://huggingface.co/spaces/teticio/audio-diffusion Built to style transfer to audio using Huggingface diffusers.\
-        Outputs a 5 second audio clip with elements from the initial audio uploaded. This takes about 2 hours without a GPU, so why not bake a cake in the meantime? (Or try the teticio/audio-diffusion-ddim-256 \
-                model which is faster.) The code for doing style transfer method was already into teticio's repo and python notebooks this is just my attempt to hook it up in the hugging face space environment it up into a hugging face space. still need some more testing and such but would be cool to hook up step number and then also do inpainting and outpointing In this space and get the api working with the updated pipelines",
                     inputs=[
                         gr.Audio(source="upload",type="filepath"),
                         gr.Dropdown(label="Model",
                                     choices=[
                                         "teticio/audio-diffusion-256",
@@ -45,4 +46,4 @@ if __name__ == "__main__":
     parser.add_argument("--port", type=int)
     parser.add_argument("--server", type=int)
     args = parser.parse_args()
-    demo.launch(server_name=args.server or "0.0.0.0", server_port=args.port)

 from audiodiffusion import AudioDiffusion
+def generate_spectrogram_audio_and_loop(audio_file,steps,model_id):
     print(audio_file)
     print(model_id)
     audio_diffusion = AudioDiffusion(model_id=model_id)
     image, (sample_rate,
+            audio) = audio_diffusion.generate_spectrogram_and_audio_from_audio(audio_file,steps)
     loop = AudioDiffusion.loop_it(audio, sample_rate)
     if loop is None:
         loop = audio
 demo = gr.Interface(fn=generate_spectrogram_audio_and_loop,
                     title="Audio Diffusion",
                     description="Forked from https://huggingface.co/spaces/teticio/audio-diffusion Built to style transfer to audio using Huggingface diffusers.\
+        Outputs a 5 second audio clip with elements from the initial audio uploaded, steps is relative to the amount of style transfer from model to do. This takes about 2 hours without a GPU, so why not bake a cake in the meantime? (Or try the teticio/audio-diffusion-ddim-256 \
+                model which is faster.) The code for doing style transfer method was already in teticio's repo and python notebooks this is just my attempt to hook it up in the hugging face space. still need some more testing and such but would be cool to add more models, do inpainting, outpointing and get the api working with the updated pipelines",
                     inputs=[
                         gr.Audio(source="upload",type="filepath"),
+                        gr.Slider(minimum=0, maximum=1000,value=500, step=1, label="Steps counter between 0 and 1000, high means more style transfer from model"),
                         gr.Dropdown(label="Model",
                                     choices=[
                                         "teticio/audio-diffusion-256",
     parser.add_argument("--port", type=int)
     parser.add_argument("--server", type=int)
     args = parser.parse_args()
+    demo.launch(server_name=args.server or "0.0.0.0", server_port=args.port)