teticio commited on
Commit
9fca2a2
1 Parent(s): 9e07e24

added style transfer example

Browse files
Files changed (1) hide show
  1. notebooks/test_model.ipynb +52 -4
notebooks/test_model.ipynb CHANGED
@@ -128,7 +128,7 @@
128
  "id": "80e5b5fa",
129
  "metadata": {},
130
  "source": [
131
- "Try playing around with `start_steps`. Values closer to zero will produce new samples, while values closer to `steps` will produce samples more faithful to the original. You can also try generatring variations of a `slice` of an `audio_file` instead of passing in a `raw_audio`, which results in a kind of style transfer."
132
  ]
133
  },
134
  {
@@ -156,7 +156,7 @@
156
  "outputs": [],
157
  "source": [
158
  "start_steps = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
159
- "loop = AudioDiffusion.loop_it(audio, sample_rate, loops=1)\n",
160
  "for variation in range(12):\n",
161
  " image2, (\n",
162
  " sample_rate, audio2\n",
@@ -166,8 +166,56 @@
166
  " steps=1000)\n",
167
  " display(image2)\n",
168
  " display(Audio(audio2, rate=sample_rate))\n",
169
- " loop = np.concatenate([loop, AudioDiffusion.loop_it(audio2, sample_rate, loops=1)])\n",
170
- "display(Audio(loop, rate=sample_rate))"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  ]
172
  },
173
  {
 
128
  "id": "80e5b5fa",
129
  "metadata": {},
130
  "source": [
131
+ "Try playing around with `start_steps`. Values closer to zero will produce new samples, while values closer to `steps` will produce samples more faithful to the original."
132
  ]
133
  },
134
  {
 
156
  "outputs": [],
157
  "source": [
158
  "start_steps = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
159
+ "track = AudioDiffusion.loop_it(audio, sample_rate, loops=1)\n",
160
  "for variation in range(12):\n",
161
  " image2, (\n",
162
  " sample_rate, audio2\n",
 
166
  " steps=1000)\n",
167
  " display(image2)\n",
168
  " display(Audio(audio2, rate=sample_rate))\n",
169
+ " track = np.concatenate([track, AudioDiffusion.loop_it(audio2, sample_rate, loops=1)])\n",
170
+ "display(Audio(track, rate=sample_rate))"
171
+ ]
172
+ },
173
+ {
174
+ "cell_type": "markdown",
175
+ "id": "15a39884",
176
+ "metadata": {},
177
+ "source": [
178
+ "### Remix (style transfer)"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "markdown",
183
+ "id": "116bd8c3",
184
+ "metadata": {},
185
+ "source": [
186
+ "Alternatively, you can start from another audio altogether, resulting in a kind of style transfer."
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "id": "c2a9c85c",
193
+ "metadata": {
194
+ "scrolled": true
195
+ },
196
+ "outputs": [],
197
+ "source": [
198
+ "start_steps = 700 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
199
+ "audio_file = \"track.mp3\" #@param {type:\"string\"}\n",
200
+ "audio_diffusion.mel.load_audio(audio_file)\n",
201
+ "track = np.array([])\n",
202
+ "generator = torch.Generator().manual_seed(seed)\n",
203
+ "seed = generator.seed()\n",
204
+ "for slice in range(audio_diffusion.mel.get_number_of_slices()):\n",
205
+ " generator.manual_seed(seed)\n",
206
+ " audio = audio_diffusion.mel.get_audio_slice(slice)\n",
207
+ " _, (\n",
208
+ " sample_rate, audio2\n",
209
+ " ) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
210
+ " audio_file=audio_file,\n",
211
+ " slice=slice,\n",
212
+ " start_step=start_steps,\n",
213
+ " steps=1000,\n",
214
+ " generator=generator)\n",
215
+ " display(Audio(audio, rate=sample_rate))\n",
216
+ " display(Audio(audio2, rate=sample_rate))\n",
217
+ " track = np.concatenate([track, audio2])\n",
218
+ "display(Audio(track, rate=sample_rate))"
219
  ]
220
  },
221
  {