nakas commited on
Commit
31a2fce
Β·
1 Parent(s): 534ccdb

upgraded using tf2 upgrade notebook

Browse files
audio_style_transfer/models/timedomain.py CHANGED
@@ -1,8 +1,7 @@
1
  """NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
2
  Parag K. Mital
3
  """
4
- import tensorflow.compat.v1 as tf
5
- tf.disable_v2_behavior()
6
  import librosa
7
  import numpy as np
8
  from scipy.signal import hann
@@ -75,17 +74,17 @@ def instance_norm(x, epsilon=1e-5):
75
  epsilon : float, optional
76
  Description
77
  """
78
- with tf.variable_scope('instance_norm'):
79
- mean, var = tf.nn.moments(x, [1, 2], keep_dims=True)
80
- scale = tf.get_variable(
81
  name='scale',
82
  shape=[x.get_shape()[-1]],
83
- initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02))
84
- offset = tf.get_variable(
85
  name='offset',
86
  shape=[x.get_shape()[-1]],
87
- initializer=tf.constant_initializer(0.0))
88
- out = scale * tf.div(x - mean, tf.sqrt(var + epsilon)) + offset
89
  return out
90
 
91
 
@@ -97,23 +96,23 @@ def compute_inputs(x, freqs, n_fft, n_frames, input_features, norm=False):
97
  return x
98
  freqs_tf = tf.constant(freqs, name="freqs", dtype='float32')
99
  inputs = {}
100
- with tf.variable_scope('real'):
101
  inputs['real'] = norm_fn(tf.reshape(
102
  tf.matmul(x, tf.cos(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
103
- with tf.variable_scope('imag'):
104
  inputs['imag'] = norm_fn(tf.reshape(
105
  tf.matmul(x, tf.sin(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
106
- with tf.variable_scope('mags'):
107
  inputs['mags'] = norm_fn(tf.reshape(
108
  tf.sqrt(
109
  tf.maximum(1e-15, inputs['real'] * inputs['real'] + inputs[
110
  'imag'] * inputs['imag'])), [1, 1, n_frames, n_fft // 2]))
111
- with tf.variable_scope('phase'):
112
  inputs['phase'] = norm_fn(tf.atan2(inputs['imag'], inputs['real']))
113
- with tf.variable_scope('unwrapped'):
114
- inputs['unwrapped'] = tf.py_func(
115
  unwrap, [inputs['phase']], tf.float32)
116
- with tf.variable_scope('unwrapped_difference'):
117
  inputs['unwrapped_difference'] = (tf.slice(
118
  inputs['unwrapped'],
119
  [0, 0, 0, 1], [-1, -1, -1, n_fft // 2 - 1]) -
@@ -147,9 +146,10 @@ def compute_features(content,
147
  kernels = []
148
  content_features = []
149
  style_features = []
150
- config_proto = tf.ConfigProto
151
- with g.as_default(), g.device('/cpu:0'), tf.Session(config=config_proto) as sess:
152
- x = tf.placeholder('float32', [n_frames, n_samples], name="x")
 
153
  p = np.reshape(
154
  np.linspace(0.0, n_samples - 1, n_samples), [n_samples, 1])
155
  k = np.reshape(
@@ -157,7 +157,7 @@ def compute_features(content,
157
  [1, n_fft // 2])
158
  freqs = np.dot(p, k)
159
  inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
160
- sess.run(tf.initialize_all_variables())
161
  content_feature = net.eval(feed_dict={x: content_tf})
162
  content_features.append(content_feature)
163
  style_feature = inputs['mags'].eval(feed_dict={x: style_tf})
@@ -177,8 +177,8 @@ def compute_features(content,
177
  kernel_tf = tf.constant(
178
  kernel, name="kernel{}".format(layer_i), dtype='float32')
179
  conv = tf.nn.conv2d(
180
- net,
181
- kernel_tf,
182
  strides=[1, stride, stride, 1],
183
  padding="VALID",
184
  name="conv{}".format(layer_i))
@@ -215,7 +215,7 @@ def compute_stylization(kernels,
215
  inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
216
  content_loss = alpha * 2 * tf.nn.l2_loss(net - content_features[0])
217
  feats = tf.reshape(inputs['mags'], (-1, n_fft // 2))
218
- gram = tf.matmul(tf.transpose(feats), feats) / (n_frames)
219
  style_loss = 2 * tf.nn.l2_loss(gram - style_gram[0])
220
  for layer_i in range(n_layers):
221
  kernel_tf = tf.constant(
@@ -223,8 +223,8 @@ def compute_stylization(kernels,
223
  name="kernel{}".format(layer_i),
224
  dtype='float32')
225
  conv = tf.nn.conv2d(
226
- net,
227
- kernel_tf,
228
  strides=[1, stride, stride, 1],
229
  padding="VALID",
230
  name="conv{}".format(layer_i))
@@ -233,7 +233,7 @@ def compute_stylization(kernels,
233
  alpha * 2 * tf.nn.l2_loss(net - content_features[layer_i + 1])
234
  _, height, width, number = map(lambda i: i.value, net.get_shape())
235
  feats = tf.reshape(net, (-1, number))
236
- gram = tf.matmul(tf.transpose(feats), feats) / (n_frames)
237
  style_loss = style_loss + 2 * tf.nn.l2_loss(gram - style_gram[
238
  layer_i + 1])
239
  loss = content_loss + style_loss
@@ -241,17 +241,17 @@ def compute_stylization(kernels,
241
  opt = tf.contrib.opt.ScipyOptimizerInterface(
242
  loss, method='L-BFGS-B', options={'maxiter': iterations})
243
  # Optimization
244
- with tf.Session() as sess:
245
- sess.run(tf.initialize_all_variables())
246
  print('Started optimization.')
247
  opt.minimize(sess)
248
  result = x.eval()
249
  else:
250
- opt = tf.train.AdamOptimizer(
251
  learning_rate=learning_rate).minimize(loss)
252
  # Optimization
253
- with tf.Session() as sess:
254
- sess.run(tf.initialize_all_variables())
255
  print('Started optimization.')
256
  for i in range(iterations):
257
  s, c, l, _ = sess.run([style_loss, content_loss, loss, opt])
 
1
  """NIPS2017 "Time Domain Neural Audio Style Transfer" code repository
2
  Parag K. Mital
3
  """
4
+ import tensorflow as tf
 
5
  import librosa
6
  import numpy as np
7
  from scipy.signal import hann
 
74
  epsilon : float, optional
75
  Description
76
  """
77
+ with tf.compat.v1.variable_scope('instance_norm'):
78
+ mean, var = tf.nn.moments(x=x, axes=[1, 2], keepdims=True)
79
+ scale = tf.compat.v1.get_variable(
80
  name='scale',
81
  shape=[x.get_shape()[-1]],
82
+ initializer=tf.compat.v1.truncated_normal_initializer(mean=1.0, stddev=0.02))
83
+ offset = tf.compat.v1.get_variable(
84
  name='offset',
85
  shape=[x.get_shape()[-1]],
86
+ initializer=tf.compat.v1.constant_initializer(0.0))
87
+ out = scale * tf.compat.v1.div(x - mean, tf.sqrt(var + epsilon)) + offset
88
  return out
89
 
90
 
 
96
  return x
97
  freqs_tf = tf.constant(freqs, name="freqs", dtype='float32')
98
  inputs = {}
99
+ with tf.compat.v1.variable_scope('real'):
100
  inputs['real'] = norm_fn(tf.reshape(
101
  tf.matmul(x, tf.cos(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
102
+ with tf.compat.v1.variable_scope('imag'):
103
  inputs['imag'] = norm_fn(tf.reshape(
104
  tf.matmul(x, tf.sin(freqs_tf)), [1, 1, n_frames, n_fft // 2]))
105
+ with tf.compat.v1.variable_scope('mags'):
106
  inputs['mags'] = norm_fn(tf.reshape(
107
  tf.sqrt(
108
  tf.maximum(1e-15, inputs['real'] * inputs['real'] + inputs[
109
  'imag'] * inputs['imag'])), [1, 1, n_frames, n_fft // 2]))
110
+ with tf.compat.v1.variable_scope('phase'):
111
  inputs['phase'] = norm_fn(tf.atan2(inputs['imag'], inputs['real']))
112
+ with tf.compat.v1.variable_scope('unwrapped'):
113
+ inputs['unwrapped'] = tf.compat.v1.py_func(
114
  unwrap, [inputs['phase']], tf.float32)
115
+ with tf.compat.v1.variable_scope('unwrapped_difference'):
116
  inputs['unwrapped_difference'] = (tf.slice(
117
  inputs['unwrapped'],
118
  [0, 0, 0, 1], [-1, -1, -1, n_fft // 2 - 1]) -
 
146
  kernels = []
147
  content_features = []
148
  style_features = []
149
+ config_proto = tf.compat.v1.ConfigProto()
150
+ config_proto.gpu_options.allow_growth = True
151
+ with g.as_default(), g.device('/cpu:0'), tf.compat.v1.Session(config=config_proto) as sess:
152
+ x = tf.compat.v1.placeholder('float32', [n_frames, n_samples], name="x")
153
  p = np.reshape(
154
  np.linspace(0.0, n_samples - 1, n_samples), [n_samples, 1])
155
  k = np.reshape(
 
157
  [1, n_fft // 2])
158
  freqs = np.dot(p, k)
159
  inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
160
+ sess.run(tf.compat.v1.initialize_all_variables())
161
  content_feature = net.eval(feed_dict={x: content_tf})
162
  content_features.append(content_feature)
163
  style_feature = inputs['mags'].eval(feed_dict={x: style_tf})
 
177
  kernel_tf = tf.constant(
178
  kernel, name="kernel{}".format(layer_i), dtype='float32')
179
  conv = tf.nn.conv2d(
180
+ input=net,
181
+ filters=kernel_tf,
182
  strides=[1, stride, stride, 1],
183
  padding="VALID",
184
  name="conv{}".format(layer_i))
 
215
  inputs, net = compute_inputs(x, freqs, n_fft, n_frames, input_features, norm)
216
  content_loss = alpha * 2 * tf.nn.l2_loss(net - content_features[0])
217
  feats = tf.reshape(inputs['mags'], (-1, n_fft // 2))
218
+ gram = tf.matmul(tf.transpose(a=feats), feats) / (n_frames)
219
  style_loss = 2 * tf.nn.l2_loss(gram - style_gram[0])
220
  for layer_i in range(n_layers):
221
  kernel_tf = tf.constant(
 
223
  name="kernel{}".format(layer_i),
224
  dtype='float32')
225
  conv = tf.nn.conv2d(
226
+ input=net,
227
+ filters=kernel_tf,
228
  strides=[1, stride, stride, 1],
229
  padding="VALID",
230
  name="conv{}".format(layer_i))
 
233
  alpha * 2 * tf.nn.l2_loss(net - content_features[layer_i + 1])
234
  _, height, width, number = map(lambda i: i.value, net.get_shape())
235
  feats = tf.reshape(net, (-1, number))
236
+ gram = tf.matmul(tf.transpose(a=feats), feats) / (n_frames)
237
  style_loss = style_loss + 2 * tf.nn.l2_loss(gram - style_gram[
238
  layer_i + 1])
239
  loss = content_loss + style_loss
 
241
  opt = tf.contrib.opt.ScipyOptimizerInterface(
242
  loss, method='L-BFGS-B', options={'maxiter': iterations})
243
  # Optimization
244
+ with tf.compat.v1.Session() as sess:
245
+ sess.run(tf.compat.v1.initialize_all_variables())
246
  print('Started optimization.')
247
  opt.minimize(sess)
248
  result = x.eval()
249
  else:
250
+ opt = tf.compat.v1.train.AdamOptimizer(
251
  learning_rate=learning_rate).minimize(loss)
252
  # Optimization
253
+ with tf.compat.v1.Session() as sess:
254
+ sess.run(tf.compat.v1.initialize_all_variables())
255
  print('Started optimization.')
256
  for i in range(iterations):
257
  s, c, l, _ = sess.run([style_loss, content_loss, loss, opt])