AMP-2023-S2-SoundGeneration

Running

bdac835 11 months ago

1.49 kB

	from cvae import CVAE
	import torch
	from typing import Sequence
	import re

	instruments = ['bass_acoustic', 'brass_acoustic', 'flute_acoustic', 'guitar_acoustic', 'keyboard_acoustic', 'mallet_acoustic', 'organ_acoustic', 'reed_acoustic', 'string_acoustic', 'synth_lead_acoustic', 'vocal_acoustic', 'bass_synthetic', 'brass_synthetic', 'flute_synthetic', 'guitar_synthetic', 'keyboard_synthetic', 'mallet_synthetic', 'organ_synthetic', 'reed_synthetic', 'string_synthetic', 'synth_lead_synthetic', 'vocal_synthetic', 'bass_electronic', 'brass_electronic', 'flute_electronic', 'guitar_electronic', 'keyboard_electronic', 'mallet_electronic', 'organ_electronic', 'reed_electronic', 'string_electronic', 'synth_lead_electronic', 'vocal_electronic']

	model = CVAE.load_from_checkpoint(
	'epoch=17-step=650718.ckpt',
	io_channels=1,
	io_features=16000*4,
	latent_features=5,
	channels=[32, 64, 128, 256, 512],
	num_classes=len(instruments),
	learning_rate=1e-5
	)

	def format(text):
	text = text.split(' ')[-1]
	return text.replace(" ", "").lower()

	def choice_to_tensor(choice: Sequence[str]) -> torch.Tensor:
	choice = '_'.join([format(i) for i in choice])
	return torch.tensor(instruments.index(choice))

	def generate(choice: Sequence[str], params: Sequence[int]=None):
	noise = torch.tensor(params).unsqueeze(0).to('cuda') if params else torch.randn(1, 5).to('cuda')
	return model.sample(eps=noise, c = choice_to_tensor(choice).to('cuda')).cpu().numpy()[0]