marcop commited on
Commit
0895906
1 Parent(s): f5031e6

update demo

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. utils.py +20 -84
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Musika
3
- emoji: 🚀
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
 
1
  ---
2
  title: Musika
3
+ emoji: 🎵
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
utils.py CHANGED
@@ -34,11 +34,7 @@ class Utils_functions:
34
  )
35
  mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
36
  enorm = tf.cast(
37
- tf.expand_dims(
38
- tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])),
39
- 0,
40
- ),
41
- tf.float32,
42
  )
43
  melmat = tf.multiply(melmat, enorm)
44
  melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
@@ -165,8 +161,9 @@ class Utils_functions:
165
  outls.append(model(x[i * bs : i * bs + bs], training=False))
166
 
167
  if dual_out:
168
- return np.concatenate([outls[k][0] for k in range(len(outls))], 0), np.concatenate(
169
- [outls[k][1] for k in range(len(outls))], 0
 
170
  )
171
  else:
172
  return np.concatenate(outls, 0)
@@ -199,8 +196,9 @@ class Utils_functions:
199
  inp = tf.concat(inpls, 0)
200
  res = model(inp, training=False)
201
  outls.append(res)
202
- return np.concatenate([outls[k][0] for k in range(len(outls))], 0), np.concatenate(
203
- [outls[k][1] for k in range(len(outls))], 0
 
204
  )
205
 
206
  def distribute_dec2(self, x, model, bs=64):
@@ -228,17 +226,7 @@ class Utils_functions:
228
  return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
229
 
230
  def generate_example_stereo(self, models_ls):
231
- (
232
- critic,
233
- gen,
234
- enc,
235
- dec,
236
- enc2,
237
- dec2,
238
- critic_rec,
239
- gen_ema,
240
- [opt_dec, opt_disc],
241
- ) = models_ls
242
  abb = gen_ema(self.get_noise_interp(), training=False)
243
  abbls = tf.split(abb, abb.shape[-2] // 16, -2)
244
  abb = tf.concat(abbls, 0)
@@ -247,13 +235,7 @@ class Utils_functions:
247
  for channel in range(2):
248
 
249
  ab = self.distribute_dec2(
250
- abb[
251
- :,
252
- :,
253
- :,
254
- channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,
255
- ],
256
- dec2,
257
  )
258
  abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
259
  ab = tf.concat(abls, 0)
@@ -291,28 +273,14 @@ class Utils_functions:
291
 
292
  fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
293
  axs[0].imshow(
294
- np.flip(
295
- np.array(
296
- tf.transpose(
297
- self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256),
298
- [1, 0],
299
- )
300
- ),
301
- -2,
302
- ),
303
  cmap=None,
304
  )
305
  axs[0].axis("off")
306
  axs[0].set_title("Generated1")
307
  axs[1].imshow(
308
  np.flip(
309
- np.array(
310
- tf.transpose(
311
- self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256),
312
- [1, 0],
313
- )
314
- ),
315
- -2,
316
  ),
317
  cmap=None,
318
  )
@@ -320,13 +288,7 @@ class Utils_functions:
320
  axs[1].set_title("Generated2")
321
  axs[2].imshow(
322
  np.flip(
323
- np.array(
324
- tf.transpose(
325
- self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256),
326
- [1, 0],
327
- )
328
- ),
329
- -2,
330
  ),
331
  cmap=None,
332
  )
@@ -334,13 +296,7 @@ class Utils_functions:
334
  axs[2].set_title("Generated3")
335
  axs[3].imshow(
336
  np.flip(
337
- np.array(
338
- tf.transpose(
339
- self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256),
340
- [1, 0],
341
- )
342
- ),
343
- -2,
344
  ),
345
  cmap=None,
346
  )
@@ -351,26 +307,9 @@ class Utils_functions:
351
 
352
  # Save in training loop
353
  def save_end(
354
- self,
355
- epoch,
356
- gloss,
357
- closs,
358
- mloss,
359
- models_ls=None,
360
- n_save=3,
361
- save_path="checkpoints",
362
  ):
363
- (
364
- critic,
365
- gen,
366
- enc,
367
- dec,
368
- enc2,
369
- dec2,
370
- critic_rec,
371
- gen_ema,
372
- [opt_dec, opt_disc],
373
- ) = models_ls
374
  if epoch % n_save == 0:
375
  print("Saving...")
376
  path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
@@ -502,7 +441,7 @@ class Utils_functions:
502
  )
503
 
504
  def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
505
- article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) at Johannes Kepler Universität Linz. Supervised by Jan Schlüter."
506
 
507
  def gradio_func(x, y, z):
508
  return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
@@ -514,13 +453,10 @@ class Utils_functions:
514
  choices=["Techno/Experimental", "Classical"],
515
  type="index",
516
  default="Classical",
517
- label="Music Genre to Generate (Brace yourself for very high levels of weirdness!)",
518
  ),
519
  gr.inputs.Radio(
520
- choices=["23 s", "115 s", "230 s"],
521
- type="index",
522
- default="115 s",
523
- label="Generated Music Length",
524
  ),
525
  gr.inputs.Slider(
526
  minimum=0,
@@ -536,7 +472,7 @@ class Utils_functions:
536
  ],
537
  allow_screenshot=False,
538
  title="musika!",
539
- description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length",
540
  article=article_text,
541
  layout="vertical",
542
  theme="huggingface",
@@ -551,7 +487,7 @@ class Utils_functions:
551
  if train:
552
  iface.launch(prevent_thread_lock=True)
553
  else:
554
- iface.launch()
555
  # iface.launch(share=True, enable_queue=True)
556
  print("--------------------------------")
557
  print("--------------------------------")
 
34
  )
35
  mel_f = tf.convert_to_tensor(librosa.mel_frequencies(n_mels=args.mel_bins + 2, fmin=0.0, fmax=args.sr // 2))
36
  enorm = tf.cast(
37
+ tf.expand_dims(tf.constant(2.0 / (mel_f[2 : args.mel_bins + 2] - mel_f[: args.mel_bins])), 0,), tf.float32,
 
 
 
 
38
  )
39
  melmat = tf.multiply(melmat, enorm)
40
  melmat = tf.divide(melmat, tf.reduce_sum(melmat, axis=0))
 
161
  outls.append(model(x[i * bs : i * bs + bs], training=False))
162
 
163
  if dual_out:
164
+ return (
165
+ np.concatenate([outls[k][0] for k in range(len(outls))], 0),
166
+ np.concatenate([outls[k][1] for k in range(len(outls))], 0),
167
  )
168
  else:
169
  return np.concatenate(outls, 0)
 
196
  inp = tf.concat(inpls, 0)
197
  res = model(inp, training=False)
198
  outls.append(res)
199
+ return (
200
+ np.concatenate([outls[k][0] for k in range(len(outls))], 0),
201
+ np.concatenate([outls[k][1] for k in range(len(outls))], 0),
202
  )
203
 
204
  def distribute_dec2(self, x, model, bs=64):
 
226
  return tf.image.random_crop(noisetot, [1, self.args.latlen, 64 + 64])
227
 
228
  def generate_example_stereo(self, models_ls):
229
+ (critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
 
 
 
 
 
 
 
 
 
 
230
  abb = gen_ema(self.get_noise_interp(), training=False)
231
  abbls = tf.split(abb, abb.shape[-2] // 16, -2)
232
  abb = tf.concat(abbls, 0)
 
235
  for channel in range(2):
236
 
237
  ab = self.distribute_dec2(
238
+ abb[:, :, :, channel * self.args.latdepth : channel * self.args.latdepth + self.args.latdepth,], dec2,
 
 
 
 
 
 
239
  )
240
  abls = tf.split(ab, ab.shape[-2] // self.args.shape, -2)
241
  ab = tf.concat(abls, 0)
 
273
 
274
  fig, axs = plt.subplots(nrows=4, ncols=1, figsize=(20, 20))
275
  axs[0].imshow(
276
+ np.flip(np.array(tf.transpose(self.wv2spec_hop((abwv[:, 0] + abwv[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,),
 
 
 
 
 
 
 
 
277
  cmap=None,
278
  )
279
  axs[0].axis("off")
280
  axs[0].set_title("Generated1")
281
  axs[1].imshow(
282
  np.flip(
283
+ np.array(tf.transpose(self.wv2spec_hop((abwv2[:, 0] + abwv2[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
 
 
 
 
 
 
284
  ),
285
  cmap=None,
286
  )
 
288
  axs[1].set_title("Generated2")
289
  axs[2].imshow(
290
  np.flip(
291
+ np.array(tf.transpose(self.wv2spec_hop((abwv3[:, 0] + abwv3[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
 
 
 
 
 
 
292
  ),
293
  cmap=None,
294
  )
 
296
  axs[2].set_title("Generated3")
297
  axs[3].imshow(
298
  np.flip(
299
+ np.array(tf.transpose(self.wv2spec_hop((abwv4[:, 0] + abwv4[:, 1]) / 2.0, 80.0, 256), [1, 0],)), -2,
 
 
 
 
 
 
300
  ),
301
  cmap=None,
302
  )
 
307
 
308
  # Save in training loop
309
  def save_end(
310
+ self, epoch, gloss, closs, mloss, models_ls=None, n_save=3, save_path="checkpoints",
 
 
 
 
 
 
 
311
  ):
312
+ (critic, gen, enc, dec, enc2, dec2, critic_rec, gen_ema, [opt_dec, opt_disc],) = models_ls
 
 
 
 
 
 
 
 
 
 
313
  if epoch % n_save == 0:
314
  print("Saving...")
315
  path = f"{save_path}/MUSIKA!_-{str(gloss)[:9]}-{str(closs)[:9]}-{str(mloss)[:9]}"
 
441
  )
442
 
443
  def render_gradio(self, models_ls_techno, models_ls_classical, train=True):
444
+ article_text = "Original work by Marco Pasini ([Twitter](https://twitter.com/marco_ppasini)) and Jan Schlüter at Johannes Kepler Universität Linz."
445
 
446
  def gradio_func(x, y, z):
447
  return self.stfunc(x, y, z, models_ls_techno, models_ls_classical)
 
453
  choices=["Techno/Experimental", "Classical"],
454
  type="index",
455
  default="Classical",
456
+ label="Music Genre to Generate",
457
  ),
458
  gr.inputs.Radio(
459
+ choices=["23s", "1m 58s", "3m 57s"], type="index", default="1m 58s", label="Generated Music Length",
 
 
 
460
  ),
461
  gr.inputs.Slider(
462
  minimum=0,
 
472
  ],
473
  allow_screenshot=False,
474
  title="musika!",
475
+ description="Blazingly Fast Stereo Waveform Music Generation of Arbitrary Length. Be patient and enjoy the weirdness!",
476
  article=article_text,
477
  layout="vertical",
478
  theme="huggingface",
 
487
  if train:
488
  iface.launch(prevent_thread_lock=True)
489
  else:
490
+ iface.launch(enable_queue=True)
491
  # iface.launch(share=True, enable_queue=True)
492
  print("--------------------------------")
493
  print("--------------------------------")