Spaces:
Sleeping
Sleeping
Upload with huggingface_hub
Browse files- .gitattributes +1 -0
- README.md +6 -6
- app.py +143 -0
- flute.wav +0 -0
- new-sax-1.mp3 +0 -0
- new-sax-1.wav +0 -0
- new-sax.wav +0 -0
- sax.wav +3 -0
- sax2.wav +0 -0
- trombone.wav +0 -0
.gitattributes
CHANGED
@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
sax.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.3.1
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
|
2 |
---
|
3 |
+
title: blocks_neural_instrument_coding
|
4 |
+
emoji: 🔥
|
5 |
+
colorFrom: indigo
|
6 |
+
colorTo: indigo
|
7 |
sdk: gradio
|
8 |
sdk_version: 3.3.1
|
9 |
+
|
10 |
app_file: app.py
|
11 |
pinned: false
|
12 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# A Blocks implementation of https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6
|
2 |
+
|
3 |
+
import datetime
|
4 |
+
import os
|
5 |
+
import random
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
from gradio.components import Markdown as m
|
9 |
+
|
10 |
+
|
11 |
+
def get_time():
|
12 |
+
now = datetime.datetime.now()
|
13 |
+
return now.strftime("%m/%d/%Y, %H:%M:%S")
|
14 |
+
|
15 |
+
|
16 |
+
def generate_recording():
|
17 |
+
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
|
18 |
+
|
19 |
+
|
20 |
+
def reconstruct(audio):
|
21 |
+
return random.choice(["new-sax-1.mp3", "new-sax-1.wav"])
|
22 |
+
|
23 |
+
|
24 |
+
io1 = gr.Interface(
|
25 |
+
lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax.wav"),
|
26 |
+
[
|
27 |
+
gr.Slider(label="pitch"),
|
28 |
+
gr.Slider(label="loudness"),
|
29 |
+
gr.Audio(label="base audio file (optional)"),
|
30 |
+
],
|
31 |
+
gr.Audio(),
|
32 |
+
)
|
33 |
+
|
34 |
+
io2 = gr.Interface(
|
35 |
+
lambda x, y, z: os.path.join(os.path.dirname(__file__),"flute.wav"),
|
36 |
+
[
|
37 |
+
gr.Slider(label="pitch"),
|
38 |
+
gr.Slider(label="loudness"),
|
39 |
+
gr.Audio(label="base audio file (optional)"),
|
40 |
+
],
|
41 |
+
gr.Audio(),
|
42 |
+
)
|
43 |
+
|
44 |
+
io3 = gr.Interface(
|
45 |
+
lambda x, y, z: os.path.join(os.path.dirname(__file__),"trombone.wav"),
|
46 |
+
[
|
47 |
+
gr.Slider(label="pitch"),
|
48 |
+
gr.Slider(label="loudness"),
|
49 |
+
gr.Audio(label="base audio file (optional)"),
|
50 |
+
],
|
51 |
+
gr.Audio(),
|
52 |
+
)
|
53 |
+
|
54 |
+
io4 = gr.Interface(
|
55 |
+
lambda x, y, z: os.path.join(os.path.dirname(__file__),"sax2.wav"),
|
56 |
+
[
|
57 |
+
gr.Slider(label="pitch"),
|
58 |
+
gr.Slider(label="loudness"),
|
59 |
+
gr.Audio(label="base audio file (optional)"),
|
60 |
+
],
|
61 |
+
gr.Audio(),
|
62 |
+
)
|
63 |
+
|
64 |
+
demo = gr.Blocks(title="Neural Instrument Cloning")
|
65 |
+
|
66 |
+
with demo.clear():
|
67 |
+
m(
|
68 |
+
"""
|
69 |
+
## Neural Instrument Cloning from Very Few Samples
|
70 |
+
<center><img src="https://media.istockphoto.com/photos/brass-trombone-picture-id490455809?k=20&m=490455809&s=612x612&w=0&h=l9KJvH_25z0QTLggHrcH_MsR4gPLH7uXwDPUAZ_C5zk=" width="400px"></center>"""
|
71 |
+
)
|
72 |
+
m(
|
73 |
+
"""
|
74 |
+
This Blocks implementation is an adaptation [a report written](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6) by Nicolas Jonason and Bob L.T. Sturm.
|
75 |
+
|
76 |
+
I've implemented it in Blocks to show off some cool features, such as embedding live ML demos. More on that ahead...
|
77 |
+
|
78 |
+
### What does this machine learning model do?
|
79 |
+
It combines techniques from neural voice cloning with musical instrument synthesis. This makes it possible to produce neural instrument synthesisers from just seconds of target instrument audio.
|
80 |
+
|
81 |
+
### Audio Examples
|
82 |
+
Here are some **real** 16 second saxophone recordings:
|
83 |
+
"""
|
84 |
+
)
|
85 |
+
gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav"), label="Here is a real 16 second saxophone recording:")
|
86 |
+
gr.Audio(os.path.join(os.path.dirname(__file__),"sax.wav"))
|
87 |
+
|
88 |
+
m(
|
89 |
+
"""\n
|
90 |
+
Here is a **generated** saxophone recordings:"""
|
91 |
+
)
|
92 |
+
a = gr.Audio(os.path.join(os.path.dirname(__file__),"new-sax.wav"))
|
93 |
+
|
94 |
+
gr.Button("Generate a new saxophone recording")
|
95 |
+
|
96 |
+
m(
|
97 |
+
"""
|
98 |
+
### Inputs to the model
|
99 |
+
The inputs to the model are:
|
100 |
+
* pitch
|
101 |
+
* loudness
|
102 |
+
* base audio file
|
103 |
+
"""
|
104 |
+
)
|
105 |
+
|
106 |
+
m(
|
107 |
+
"""
|
108 |
+
Try the model live!
|
109 |
+
"""
|
110 |
+
)
|
111 |
+
|
112 |
+
gr.TabbedInterface(
|
113 |
+
[io1, io2, io3, io4], ["Saxophone", "Flute", "Trombone", "Another Saxophone"]
|
114 |
+
)
|
115 |
+
|
116 |
+
m(
|
117 |
+
"""
|
118 |
+
### Using the model for cloning
|
119 |
+
You can also use this model a different way, to simply clone the audio file and reconstruct it
|
120 |
+
using machine learning. Here, we'll show a demo of that below:
|
121 |
+
"""
|
122 |
+
)
|
123 |
+
|
124 |
+
a2 = gr.Audio()
|
125 |
+
a2.change(reconstruct, a2, a2)
|
126 |
+
|
127 |
+
m(
|
128 |
+
"""
|
129 |
+
Thanks for reading this! As you may have realized, all of the "models" in this demo are fake. They are just designed to show you what is possible using Blocks 🤗.
|
130 |
+
|
131 |
+
For details of the model, read the [original report here](https://erlj.notion.site/Neural-Instrument-Cloning-from-very-few-samples-2cf41d8b630842ee8c7eb55036a1bfd6).
|
132 |
+
|
133 |
+
*Details for nerds*: this report was "launched" on:
|
134 |
+
"""
|
135 |
+
)
|
136 |
+
|
137 |
+
t = gr.Textbox(label="timestamp")
|
138 |
+
|
139 |
+
demo.load(get_time, [], t)
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
demo.launch()
|
flute.wav
ADDED
Binary file (223 kB). View file
|
|
new-sax-1.mp3
ADDED
Binary file (95.1 kB). View file
|
|
new-sax-1.wav
ADDED
Binary file (154 kB). View file
|
|
new-sax.wav
ADDED
Binary file (384 kB). View file
|
|
sax.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12ee32c66257e1c98ed0f2f7b708a1eab638ec09f4c69dda3ec1d78047a7be4d
|
3 |
+
size 1536044
|
sax2.wav
ADDED
Binary file (384 kB). View file
|
|
trombone.wav
ADDED
Binary file (384 kB). View file
|
|