Spaces:

Flux9665
/

MassivelyMultilingualTTS

Running on T4

App Files Files

MassivelyMultilingualTTS / Architectures /Vocoder /AMP.py

Flux9665

use explicit code instead of relying on release download

9e275b8 9 months ago

raw

history blame

2.87 kB

	# Copyright (c) 2022 NVIDIA CORPORATION.
	# Licensed under the MIT license.

	# Adapted from https://github.com/jik876/hifi-gan under the MIT license.
	# LICENSE is in incl_licenses directory.


	from alias_free_torch import *
	from alias_free_torch import Activation1d
	from torch.nn import Conv1d
	from torch.nn.utils import remove_weight_norm
	from torch.nn.utils import weight_norm

	from Architectures.Vocoder.Snake import SnakeBeta

	LRELU_SLOPE = 0.1


	class AMPBlock1(torch.nn.Module):
	def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
	super(AMPBlock1, self).__init__()

	self.convs1 = nn.ModuleList([
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
	padding=get_padding(kernel_size, dilation[0]))),
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
	padding=get_padding(kernel_size, dilation[1]))),
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
	padding=get_padding(kernel_size, dilation[2])))
	])
	self.convs1.apply(init_weights)

	self.convs2 = nn.ModuleList([
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
	padding=get_padding(kernel_size, 1))),
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
	padding=get_padding(kernel_size, 1))),
	weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
	padding=get_padding(kernel_size, 1)))
	])
	self.convs2.apply(init_weights)

	self.num_layers = len(self.convs1) + len(self.convs2) # total number of conv layers

	self.activations = nn.ModuleList([
	Activation1d(
	activation=SnakeBeta(channels, alpha_logscale=True))
	for _ in range(self.num_layers)
	])

	def forward(self, x):
	acts1, acts2 = self.activations[::2], self.activations[1::2]
	for c1, c2, a1, a2 in zip(self.convs1, self.convs2, acts1, acts2):
	xt = a1(x)
	xt = c1(xt)
	xt = a2(xt)
	xt = c2(xt)
	x = xt + x

	return x

	def remove_weight_norm(self):
	for l in self.convs1:
	remove_weight_norm(l)
	for l in self.convs2:
	remove_weight_norm(l)


	def init_weights(m, mean=0.0, std=0.01):
	classname = m.__class__.__name__
	if classname.find("Conv") != -1:
	m.weight.data.normal_(mean, std)


	def apply_weight_norm(m):
	classname = m.__class__.__name__
	if classname.find("Conv") != -1:
	weight_norm(m)


	def get_padding(kernel_size, dilation=1):
	return int((kernel_size * dilation - dilation) / 2)