Upload with huggingface_hub · pngwn/5.0b-neural-instrument-coding at f8c9a3c

	@@ -0,0 +1,143 @@

+# A Blocks implementation of https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6
+import datetime
+import os
+import random
+import gradio as gr
+from gradio.components import Markdown as m
+def get_time():
+    now = datetime.datetime.now()
+    return now.strftime("%m/%d/%Y, %H:%M:%S")
+def generate_recording():
+    return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
+def reconstruct(audio):
+    return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
+io1 = gr.Interface(
+    lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax.wav"),
+    [
+        gr.Slider(label="pitch"),
+        gr.Slider(label="loudness"),
+        gr.Audio(label="base audio file (optional)"),
+    ],
+    gr.Audio(),
+)
+io2 = gr.Interface(
+    lambda x, y, z: os.path.join(os.path.dirname(__file__),"flute.wav"),
+    [
+        gr.Slider(label="pitch"),
+        gr.Slider(label="loudness"),
+        gr.Audio(label="base audio file (optional)"),
+    ],
+    gr.Audio(),
+)
+io3 = gr.Interface(
+    lambda x, y, z: os.path.join(os.path.dirname(__file__),"trombone.wav"),
+    [
+        gr.Slider(label="pitch"),
+        gr.Slider(label="loudness"),
+        gr.Audio(label="base audio file (optional)"),
+    ],
+    gr.Audio(),
+)
+io4 = gr.Interface(
+    lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax2.wav"),
+    [
+        gr.Slider(label="pitch"),
+        gr.Slider(label="loudness"),
+        gr.Audio(label="base audio file (optional)"),
+    ],
+    gr.Audio(),
+)
+demo = gr.Blocks(title="Neural Instrument Cloning")
+with demo.clear():
+    m(
+        """
+    ## Neural Instrument Cloning from Very Few Samples
+    <center><img src="https://media.istockphoto.com/photos/brass-trombone-picture-id490455809?k=20&m=490455809&s=612x612&w=0&h=l9KJvH_25z0QTLggHrcH_MsR4gPLH7uXwDPUAZ_C5zk=" width="400px"></center>"""
+    )
+    m(
+        """
+    This Blocks implementation is an adaptation [a report written](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6) by Nicolas Jonason and Bob L.T. Sturm.
+    I've implemented it in Blocks to show off some cool features, such as embedding live ML demos. More on that ahead...
+    ### What does this machine learning model do?
+    It combines techniques from neural voice cloning with musical instrument synthesis. This makes it possible to produce neural instrument synthesisers from just seconds of target instrument audio.
+    ### Audio Examples
+    Here are some **real** 16 second saxophone recordings:
+    """
+    )
+    gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav"), label="Here is a real 16 second saxophone recording:")
+    gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav"))
+    m(
+        """\n
+        Here is a **generated** saxophone recordings:"""
+    )
+    a = gr.Audio(os.path.join(os.path.dirname(__file__),"new-sax.wav"))
+    gr.Button("Generate a new saxophone recording")
+    m(
+        """
+    ### Inputs to the model
+    The inputs to the model are:
+    * pitch
+    * loudness
+    * base audio file
+    """
+    )
+    m(
+        """
+    Try the model live!
+    """
+    )
+    gr.TabbedInterface(
+        [io1, io2, io3, io4], ["Saxophone", "Flute", "Trombone", "Another Saxophone"]
+    )
+    m(
+        """
+    ### Using the model for cloning
+    You can also use this model a different way, to simply clone the audio file and reconstruct it
+    using machine learning. Here, we'll show a demo of that below:
+    """
+    )
+    a2 = gr.Audio()
+    a2.change(reconstruct, a2, a2)
+    m(
+        """
+    Thanks for reading this! As you may have realized, all of the "models" in this demo are fake. They are just designed to show you what is possible using Blocks 🤗.
+    For details of the model, read the [original report here](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6).
+    *Details for nerds*: this report was "launched" on:
+    """
+    )
+    t = gr.Textbox(label="timestamp")
+    demo.load(get_time, [], t)
+if __name__ == "__main__":
+    demo.launch()

@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sax.wav filter=lfs diff=lfs merge=lfs -text

@@ -1,12 +1,12 @@
 ---
-title: Blocks Neural Instrument Coding
-emoji: 📈
-colorFrom: red
-colorTo: purple
 sdk: gradio
 sdk_version: 3.3.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: blocks_neural_instrument_coding
+emoji: 🔥
+colorFrom: indigo
+colorTo: indigo
 sdk: gradio
 sdk_version: 3.3.1
 app_file: app.py
 pinned: false
 ---

+version https://git-lfs.github.com/spec/v1
+oid sha256:12ee32c66257e1c98ed0f2f7b708a1eab638ec09f4c69dda3ec1d78047a7be4d
+size 1536044

	@@ -0,0 +1,3 @@