nakas's picture
github fork
2c448c3
raw
history blame
5.45 kB
"""NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
Parag K. Mital
"""
import os
import glob
import numpy as np
from audio_style_transfer.models import timedomain, uylanov
def get_path(model, output_path, content_filename, style_filename):
output_dir = os.path.join(output_path, model)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_filename = '{}/{}/{}+{}'.format(output_path, model,
content_filename.split('/')[-1],
style_filename.split('/')[-1])
return output_filename
def params():
n_fft = [2048, 4096, 8196]
n_layers = [1, 2, 4]
n_filters = [128, 2048, 4096]
hop_length = [128, 256, 512]
alpha = [0.1, 0.01, 0.005]
k_w = [4, 8, 12]
norm = [True, False]
input_features = [['mags'], ['mags', 'phase'], ['real', 'imag'], ['real', 'imag', 'mags']]
return locals()
def batch(content_path, style_path, output_path, run_timedomain=True, run_uylanov=False):
content_files = glob.glob('{}/*.wav'.format(content_path))
style_files = glob.glob('{}/*.wav'.format(style_path))
content_filename = np.random.choice(content_files)
style_filename = np.random.choice(style_files)
alpha = np.random.choice(params()['alpha'])
n_fft = np.random.choice(params()['n_fft'])
n_layers = np.random.choice(params()['n_layers'])
n_filters = np.random.choice(params()['n_filters'])
hop_length = np.random.choice(params()['hop_length'])
norm = np.random.choice(params()['norm'])
k_w = np.random.choice(params()['k_w'])
# Run the Time Domain Model
if run_timedomain:
for f in params()['input_features']:
fname = get_path('timedomain/input_features={}'.format(",".join(f)),
output_path, content_filename, style_filename)
output_filename = ('{},n_fft={},n_layers={},n_filters={},norm={},'
'hop_length={},alpha={},k_w={}.wav'.format(
fname, n_fft, n_layers, n_filters, norm,
hop_length, alpha, k_w))
print(output_filename)
if not os.path.exists(output_filename):
timedomain.run(content_fname=content_filename,
style_fname=style_filename,
output_fname=output_filename,
n_fft=n_fft,
n_layers=n_layers,
n_filters=n_filters,
hop_length=hop_length,
alpha=alpha,
norm=norm,
k_w=k_w)
if run_uylanov:
# Run Original Uylanov Model
fname = get_path('uylanov', output_path, content_filename, style_filename)
output_filename = ('{},n_fft={},n_layers={},n_filters={},'
'hop_length={},alpha={},k_w={}.wav'.format(
fname, n_fft, n_layers, n_filters, hop_length, alpha,
k_w))
print(output_filename)
if not os.path.exists(output_filename):
uylanov.run(content_filename,
style_filename,
output_filename,
n_fft=n_fft,
n_layers=n_layers,
n_filters=n_filters,
hop_length=hop_length,
alpha=alpha,
k_w=k_w)
# These only produce noise so they are commented
# # Run NSynth Encoder Model
# output_filename = get_path('nsynth-encoder', output_path, content_filename,
# style_filename)
# output_filename = ('{},n_fft={},n_layers={},n_filters={},'
# 'hop_length={},alpha={},k_w={}.wav'.format(
# fname, n_fft, n_layers, n_filters, hop_length, alpha, k_w))
# if not os.path.exists(output_filename):
# nsynth.run(content_filename,
# style_filename,
# output_filename,
# model='encoder',
# n_fft=n_fft,
# n_layers=n_layers,
# n_filters=n_filters,
# hop_length=hop_length,
# alpha=alpha,
# k_w=k_w)
# # Run NSynth Decoder Model
# output_filename = get_path('wavenet-decoder', output_path, content_filename,
# style_filename)
# output_filename = ('{},n_fft={},n_layers={},n_filters={},'
# 'hop_length={},alpha={},k_w={}.wav'.format(
# fname, n_fft, n_layers, n_filters, hop_length, alpha, k_w))
# if not os.path.exists(output_filename):
# nsynth.run(content_filename,
# style_filename,
# output_filename,
# model='decoder',
# n_fft=n_fft,
# n_layers=n_layers,
# n_filters=n_filters,
# hop_length=hop_length,
# alpha=alpha,
# k_w=k_w)
if __name__ == '__main__':
content_path = './target'
style_path = './corpus'
output_path = './results'
batch(content_path, style_path, output_path)