Spaces:

nakas
/

Time-Domain-Audio-Style-Transfer

Runtime error

App Files Files Community

nakas commited on Nov 13, 2022

Commit

31a2fce

1 Parent(s): 534ccdb

upgraded using tf2 upgrade notebook

Browse files

Files changed (1) hide show

audio_style_transfer/models/timedomain.py +31 -31

audio_style_transfer/models/timedomain.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
 Parag K. Mital
 """
-import tensorflow.compat.v1 as tf
-tf.disable_v2_behavior()
 import librosa
 import numpy as np
 from scipy.signal import hann
@@ -75,17 +74,17 @@ def instance_norm(x, epsilon=1e-5):
     epsilon : float, optional
         Description
     """
-    with tf.variable_scope('instance_norm'):
-        mean, var = tf.nn.moments(x, [1, 2], keep_dims=True)
-        scale = tf.get_variable(
             name='scale',
             shape=[x.get_shape()[-1]],
-            initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02))
-        offset = tf.get_variable(
             name='offset',
             shape=[x.get_shape()[-1]],
-            initializer=tf.constant_initializer(0.0))
-        out = scale * tf.div(x - mean, tf.sqrt(var + epsilon)) + offset
         return out
@@ -97,23 +96,23 @@ def compute_inputs(x, freqs, n_fft, n_frames, input_features, norm=False):
             return x
     freqs_tf = tf.constant(freqs, name="freqs", dtype='float32')
     inputs = {}
-    with tf.variable_scope('real'):
         inputs['real'] = norm_fn(tf.reshape(
             tf.matmul(x, tf.cos(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
-    with tf.variable_scope('imag'):
         inputs['imag'] = norm_fn(tf.reshape(
             tf.matmul(x, tf.sin(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
-    with tf.variable_scope('mags'):
         inputs['mags'] = norm_fn(tf.reshape(
             tf.sqrt(
                 tf.maximum(1e-15, inputs['real'] * inputs['real'] + inputs[
                     'imag'] * inputs['imag'])), [1, 1, n_frames, n_fft // 2]))
-    with tf.variable_scope('phase'):
         inputs['phase'] = norm_fn(tf.atan2(inputs['imag'], inputs['real']))
-    with tf.variable_scope('unwrapped'):
-        inputs['unwrapped'] = tf.py_func(
             unwrap, [inputs['phase']], tf.float32)
-    with tf.variable_scope('unwrapped_difference'):
         inputs['unwrapped_difference'] = (tf.slice(
                 inputs['unwrapped'],
                 [0, 0, 0, 1], [-1, -1, -1, n_fft // 2 - 1]) -
@@ -147,9 +146,10 @@ def compute_features(content,
     kernels = []
     content_features = []
     style_features = []
-    config_proto = tf.ConfigProto
-    with g.as_default(), g.device('/cpu:0'), tf.Session(config=config_proto) as sess:
-        x = tf.placeholder('float32', [n_frames, n_samples], name="x")
         p = np.reshape(
             np.linspace(0.0, n_samples - 1, n_samples), [n_samples, 1])
         k = np.reshape(
@@ -157,7 +157,7 @@ def compute_features(content,
             [1, n_fft // 2])
         freqs = np.dot(p, k)
         inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
-        sess.run(tf.initialize_all_variables())
         content_feature = net.eval(feed_dict={x: content_tf})
         content_features.append(content_feature)
         style_feature = inputs['mags'].eval(feed_dict={x: style_tf})
@@ -177,8 +177,8 @@ def compute_features(content,
             kernel_tf = tf.constant(
                 kernel, name="kernel{}".format(layer_i), dtype='float32')
             conv = tf.nn.conv2d(
-                net,
-                kernel_tf,
                 strides=[1, stride, stride, 1],
                 padding="VALID",
                 name="conv{}".format(layer_i))
@@ -215,7 +215,7 @@ def compute_stylization(kernels,
         inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
         content_loss = alpha * 2 * tf.nn.l2_loss(net - content_features[0])
         feats = tf.reshape(inputs['mags'], (-1, n_fft // 2))
-        gram = tf.matmul(tf.transpose(feats), feats) / (n_frames)
         style_loss = 2 * tf.nn.l2_loss(gram - style_gram[0])
         for layer_i in range(n_layers):
             kernel_tf = tf.constant(
@@ -223,8 +223,8 @@ def compute_stylization(kernels,
                 name="kernel{}".format(layer_i),
                 dtype='float32')
             conv = tf.nn.conv2d(
-                net,
-                kernel_tf,
                 strides=[1, stride, stride, 1],
                 padding="VALID",
                 name="conv{}".format(layer_i))
@@ -233,7 +233,7 @@ def compute_stylization(kernels,
                 alpha * 2 * tf.nn.l2_loss(net - content_features[layer_i + 1])
             _, height, width, number = map(lambda i: i.value, net.get_shape())
             feats = tf.reshape(net, (-1, number))
-            gram = tf.matmul(tf.transpose(feats), feats) / (n_frames)
             style_loss = style_loss + 2 * tf.nn.l2_loss(gram - style_gram[
                 layer_i + 1])
         loss = content_loss + style_loss
@@ -241,17 +241,17 @@ def compute_stylization(kernels,
             opt = tf.contrib.opt.ScipyOptimizerInterface(
                 loss, method='L-BFGS-B', options={'maxiter': iterations})
             # Optimization
-            with tf.Session() as sess:
-                sess.run(tf.initialize_all_variables())
                 print('Started optimization.')
                 opt.minimize(sess)
                 result = x.eval()
         else:
-            opt = tf.train.AdamOptimizer(
                 learning_rate=learning_rate).minimize(loss)
             # Optimization
-            with tf.Session() as sess:
-                sess.run(tf.initialize_all_variables())
                 print('Started optimization.')
                 for i in range(iterations):
                     s, c, l, _ = sess.run([style_loss, content_loss, loss, opt])

 """NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
 Parag K. Mital
 """
+import tensorflow as tf
 import librosa
 import numpy as np
 from scipy.signal import hann
     epsilon : float, optional
         Description
     """
+    with tf.compat.v1.variable_scope('instance_norm'):
+        mean, var = tf.nn.moments(x=x, axes=[1, 2], keepdims=True)
+        scale = tf.compat.v1.get_variable(
             name='scale',
             shape=[x.get_shape()[-1]],
+            initializer=tf.compat.v1.truncated_normal_initializer(mean=1.0, stddev=0.02))
+        offset = tf.compat.v1.get_variable(
             name='offset',
             shape=[x.get_shape()[-1]],
+            initializer=tf.compat.v1.constant_initializer(0.0))
+        out = scale * tf.compat.v1.div(x - mean, tf.sqrt(var + epsilon)) + offset
         return out
             return x
     freqs_tf = tf.constant(freqs, name="freqs", dtype='float32')
     inputs = {}
+    with tf.compat.v1.variable_scope('real'):
         inputs['real'] = norm_fn(tf.reshape(
             tf.matmul(x, tf.cos(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
+    with tf.compat.v1.variable_scope('imag'):
         inputs['imag'] = norm_fn(tf.reshape(
             tf.matmul(x, tf.sin(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
+    with tf.compat.v1.variable_scope('mags'):
         inputs['mags'] = norm_fn(tf.reshape(
             tf.sqrt(
                 tf.maximum(1e-15, inputs['real'] * inputs['real'] + inputs[
                     'imag'] * inputs['imag'])), [1, 1, n_frames, n_fft // 2]))
+    with tf.compat.v1.variable_scope('phase'):
         inputs['phase'] = norm_fn(tf.atan2(inputs['imag'], inputs['real']))
+    with tf.compat.v1.variable_scope('unwrapped'):
+        inputs['unwrapped'] = tf.compat.v1.py_func(
             unwrap, [inputs['phase']], tf.float32)
+    with tf.compat.v1.variable_scope('unwrapped_difference'):
         inputs['unwrapped_difference'] = (tf.slice(
                 inputs['unwrapped'],
                 [0, 0, 0, 1], [-1, -1, -1, n_fft // 2 - 1]) -
     kernels = []
     content_features = []
     style_features = []
+    config_proto = tf.compat.v1.ConfigProto()
+    config_proto.gpu_options.allow_growth = True
+    with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session(config=config_proto) as sess:
+        x = tf.compat.v1.placeholder('float32', [n_frames, n_samples], name="x")
         p = np.reshape(
             np.linspace(0.0, n_samples - 1, n_samples), [n_samples, 1])
         k = np.reshape(
             [1, n_fft // 2])
         freqs = np.dot(p, k)
         inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
+        sess.run(tf.compat.v1.initialize_all_variables())
         content_feature = net.eval(feed_dict={x: content_tf})
         content_features.append(content_feature)
         style_feature = inputs['mags'].eval(feed_dict={x: style_tf})
             kernel_tf = tf.constant(
                 kernel, name="kernel{}".format(layer_i), dtype='float32')
             conv = tf.nn.conv2d(
+                input=net,
+                filters=kernel_tf,
                 strides=[1, stride, stride, 1],
                 padding="VALID",
                 name="conv{}".format(layer_i))
         inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
         content_loss = alpha * 2 * tf.nn.l2_loss(net - content_features[0])
         feats = tf.reshape(inputs['mags'], (-1, n_fft // 2))
+        gram = tf.matmul(tf.transpose(a=feats), feats) / (n_frames)
         style_loss = 2 * tf.nn.l2_loss(gram - style_gram[0])
         for layer_i in range(n_layers):
             kernel_tf = tf.constant(
                 name="kernel{}".format(layer_i),
                 dtype='float32')
             conv = tf.nn.conv2d(
+                input=net,
+                filters=kernel_tf,
                 strides=[1, stride, stride, 1],
                 padding="VALID",
                 name="conv{}".format(layer_i))
                 alpha * 2 * tf.nn.l2_loss(net - content_features[layer_i + 1])
             _, height, width, number = map(lambda i: i.value, net.get_shape())
             feats = tf.reshape(net, (-1, number))
+            gram = tf.matmul(tf.transpose(a=feats), feats) / (n_frames)
             style_loss = style_loss + 2 * tf.nn.l2_loss(gram - style_gram[
                 layer_i + 1])
         loss = content_loss + style_loss
             opt = tf.contrib.opt.ScipyOptimizerInterface(
                 loss, method='L-BFGS-B', options={'maxiter': iterations})
             # Optimization
+            with tf.compat.v1.Session() as sess:
+                sess.run(tf.compat.v1.initialize_all_variables())
                 print('Started optimization.')
                 opt.minimize(sess)
                 result = x.eval()
         else:
+            opt = tf.compat.v1.train.AdamOptimizer(
                 learning_rate=learning_rate).minimize(loss)
             # Optimization
+            with tf.compat.v1.Session() as sess:
+                sess.run(tf.compat.v1.initialize_all_variables())
                 print('Started optimization.')
                 for i in range(iterations):
                     s, c, l, _ = sess.run([style_loss, content_loss, loss, opt])