vishred18's picture
Upload 364 files
d5ee97c
# -*- coding: utf-8 -*-
# Copyright 2020 Minh Nguyen (@dathudeptrai)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import time
import yaml
import numpy as np
import pytest
import tensorflow as tf
from tensorflow_tts.configs import Tacotron2Config
from tensorflow_tts.models import TFTacotron2
from tensorflow_tts.utils import return_strategy
from examples.tacotron2.train_tacotron2 import Tacotron2Trainer
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
logging.basicConfig(
level=logging.WARNING,
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
@pytest.mark.parametrize(
"var_train_expr, config_path",
[
("embeddings|decoder_cell", "./examples/tacotron2/conf/tacotron2.v1.yaml"),
(None, "./examples/tacotron2/conf/tacotron2.v1.yaml"),
(
"embeddings|decoder_cell",
"./examples/tacotron2/conf/tacotron2.baker.v1.yaml",
),
("embeddings|decoder_cell", "./examples/tacotron2/conf/tacotron2.kss.v1.yaml"),
],
)
def test_tacotron2_train_some_layers(var_train_expr, config_path):
config = Tacotron2Config(n_speakers=5, reduction_factor=1)
model = TFTacotron2(config, name="tacotron2")
model._build()
optimizer = tf.keras.optimizers.Adam(lr=0.001)
with open(config_path) as f:
config = yaml.load(f, Loader=yaml.Loader)
config.update({"outdir": "./"})
config.update({"var_train_expr": var_train_expr})
STRATEGY = return_strategy()
trainer = Tacotron2Trainer(
config=config, strategy=STRATEGY, steps=0, epochs=0, is_mixed_precision=False,
)
trainer.compile(model, optimizer)
len_trainable_vars = len(trainer._trainable_variables)
all_trainable_vars = len(model.trainable_variables)
if var_train_expr is None:
tf.debugging.assert_equal(len_trainable_vars, all_trainable_vars)
else:
tf.debugging.assert_less(len_trainable_vars, all_trainable_vars)
@pytest.mark.parametrize(
"n_speakers, n_chars, max_input_length, max_mel_length, batch_size",
[(2, 15, 25, 50, 2),],
)
def test_tacotron2_trainable(
n_speakers, n_chars, max_input_length, max_mel_length, batch_size
):
config = Tacotron2Config(n_speakers=n_speakers, reduction_factor=1)
model = TFTacotron2(config, name="tacotron2")
model._build()
# fake input
input_ids = tf.random.uniform(
[batch_size, max_input_length], maxval=n_chars, dtype=tf.int32
)
speaker_ids = tf.convert_to_tensor([0] * batch_size, tf.int32)
mel_gts = tf.random.uniform(shape=[batch_size, max_mel_length, 80])
mel_lengths = np.random.randint(
max_mel_length, high=max_mel_length + 1, size=[batch_size]
)
mel_lengths[-1] = max_mel_length
mel_lengths = tf.convert_to_tensor(mel_lengths, dtype=tf.int32)
stop_tokens = np.zeros((batch_size, max_mel_length), np.float32)
stop_tokens = tf.convert_to_tensor(stop_tokens)
optimizer = tf.keras.optimizers.Adam(lr=0.001)
binary_crossentropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
@tf.function(experimental_relax_shapes=True)
def one_step_training(input_ids, speaker_ids, mel_gts, mel_lengths):
with tf.GradientTape() as tape:
mel_preds, post_mel_preds, stop_preds, alignment_history = model(
input_ids,
tf.constant([max_input_length, max_input_length]),
speaker_ids,
mel_gts,
mel_lengths,
training=True,
)
loss_before = tf.keras.losses.MeanSquaredError()(mel_gts, mel_preds)
loss_after = tf.keras.losses.MeanSquaredError()(mel_gts, post_mel_preds)
stop_gts = tf.expand_dims(
tf.range(tf.reduce_max(mel_lengths), dtype=tf.int32), 0
) # [1, max_len]
stop_gts = tf.tile(stop_gts, [tf.shape(mel_lengths)[0], 1]) # [B, max_len]
stop_gts = tf.cast(
tf.math.greater_equal(stop_gts, tf.expand_dims(mel_lengths, 1) - 1),
tf.float32,
)
# calculate stop_token loss
stop_token_loss = binary_crossentropy(stop_gts, stop_preds)
loss = stop_token_loss + loss_before + loss_after
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
return loss, alignment_history
for i in range(2):
if i == 1:
start = time.time()
loss, alignment_history = one_step_training(
input_ids, speaker_ids, mel_gts, mel_lengths
)
print(f" > loss: {loss}")
total_runtime = time.time() - start
print(f" > Total run-time: {total_runtime}")
print(f" > Avg run-time: {total_runtime/10}")