butterfly-gan / assets /code_snippets /latent_walk_music.py
Ceyda Cinarel
almost final
21feb87
#Code Author: Jonathan Whitaker 😎
import librosa
import soundfile as sf
from scipy.signal import savgol_filter
# The driving audio file
audio_file = './sounds/bensound-cute.wav' #@param
# How many points in the base latent walk loop
n_points = 6 #@param
# Smooths the animation effect, smaller=jerkier, must be odd
filter_window_size=301 #@param
# How much should we scale position based on music vs the base path?
chr_scale = 0.5 #@param
base_scale = 0.3 #@param
# Load the file
X, sample_rate = sf.read(audio_file, dtype='float32')
X= X[:int(len(X)*0.5)]
# Remove percussive elements
harmonic = librosa.effects.harmonic(X[:,0])
# Get chroma_stft (power in different notes)
chroma = librosa.feature.chroma_stft(harmonic) # Just one channel
# Smooth these out
chroma = savgol_filter(chroma, filter_window_size, 3)
# Calculate how many frames we want
fps = 25
duration = X.shape[0] / sample_rate
print('Duration:', duration)
n_steps = int(fps * duration)
print('N frames:', n_steps, fps * duration)
latents = torch.randn(n_points, 256)*base_scale
chroma_latents = torch.randn(12, 256)*chr_scale
frames=[]
for i in tqdm(range(n_steps)):
p1 = max(0, int(n_points*i/n_steps))
p2 = min(n_points, int(n_points*i/n_steps)+1)%n_points # so it wraps back to 0
frac = (i-(p1*(n_steps/n_points))) / (n_steps/n_points)
l = latents[p1]*(1-frac) + latents[p2]*frac
for c in range(12): # HERE adding the music influence to the latent
scale_factor = chroma[c, int(i*chroma.shape[1]/n_steps)]
l += chroma_latents[c]*chr_scale*scale_factor
im = model.G(l.unsqueeze(0)).clamp_(0., 1.)
frame=(im[0].permute(1, 2, 0).detach().cpu().numpy()*255).astype(np.uint8)
frames.append(frame)