Spaces:
Running
on
Zero
Running
on
Zero
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch as th | |
from torch.nn.parameter import Parameter | |
import numpy as np | |
import os | |
class UniDeepFsmn(nn.Module): | |
""" | |
UniDeepFsmn is a neural network module that implements a single-deep feedforward sequence memory network (FSMN). | |
Attributes: | |
input_dim (int): Dimension of the input features. | |
output_dim (int): Dimension of the output features. | |
lorder (int): Length of the order for the convolution layers. | |
hidden_size (int): Number of hidden units in the linear layer. | |
linear (nn.Linear): Linear layer to project input features to hidden size. | |
project (nn.Linear): Linear layer to project hidden features to output dimensions. | |
conv1 (nn.Conv2d): Convolutional layer for processing the output in a grouped manner. | |
""" | |
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): | |
super(UniDeepFsmn, self).__init__() | |
self.input_dim = input_dim | |
self.output_dim = output_dim | |
if lorder is None: | |
return | |
self.lorder = lorder | |
self.hidden_size = hidden_size | |
# Initialize the layers | |
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size | |
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension | |
self.conv1 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim, bias=False) # Convolution layer | |
def forward(self, input): | |
""" | |
Forward pass for the UniDeepFsmn model. | |
Args: | |
input (torch.Tensor): Input tensor of shape (batch_size, input_dim). | |
Returns: | |
torch.Tensor: The output tensor of the same shape as input, enhanced by the network. | |
""" | |
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation | |
p1 = self.project(f1) # Project to output dimension | |
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d | |
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution | |
y = F.pad(x_per, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for causal convolution | |
out = x_per + self.conv1(y) # Add original input to convolution output | |
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions | |
return input + out1.squeeze() # Return enhanced input | |
class UniDeepFsmn_dual(nn.Module): | |
""" | |
UniDeepFsmn_dual is a neural network module that implements a dual-deep feedforward sequence memory network (FSMN). | |
This class extends the UniDeepFsmn by adding a second convolution layer for richer feature extraction. | |
Attributes: | |
input_dim (int): Dimension of the input features. | |
output_dim (int): Dimension of the output features. | |
lorder (int): Length of the order for the convolution layers. | |
hidden_size (int): Number of hidden units in the linear layer. | |
linear (nn.Linear): Linear layer to project input features to hidden size. | |
project (nn.Linear): Linear layer to project hidden features to output dimensions. | |
conv1 (nn.Conv2d): First convolutional layer for processing the output. | |
conv2 (nn.Conv2d): Second convolutional layer for further processing the features. | |
""" | |
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None): | |
super(UniDeepFsmn_dual, self).__init__() | |
self.input_dim = input_dim | |
self.output_dim = output_dim | |
if lorder is None: | |
return | |
self.lorder = lorder | |
self.hidden_size = hidden_size | |
# Initialize the layers | |
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size | |
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension | |
self.conv1 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim, bias=False) # First convolution layer | |
self.conv2 = nn.Conv2d(output_dim, output_dim, [lorder + lorder - 1, 1], [1, 1], groups=output_dim // 4, bias=False) # Second convolution layer | |
def forward(self, input): | |
""" | |
Forward pass for the UniDeepFsmn_dual model. | |
Args: | |
input (torch.Tensor): Input tensor of shape (batch_size, input_dim). | |
Returns: | |
torch.Tensor: The output tensor of the same shape as input, enhanced by the network. | |
""" | |
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation | |
p1 = self.project(f1) # Project to output dimension | |
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d | |
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution | |
y = F.pad(x_per, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for causal convolution | |
conv1_out = x_per + self.conv1(y) # Add original input to first convolution output | |
z = F.pad(conv1_out, [0, 0, self.lorder - 1, self.lorder - 1]) # Pad for second convolution | |
out = conv1_out + self.conv2(z) # Add output of second convolution | |
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions | |
return input + out1.squeeze() # Return enhanced input | |
class DilatedDenseNet(nn.Module): | |
""" | |
DilatedDenseNet implements a dense network structure with dilated convolutions. | |
This architecture enables wider receptive fields while maintaining a lower number of parameters. | |
It consists of multiple convolutional layers with dilation rates that increase at each layer. | |
Attributes: | |
depth (int): Number of convolutional layers in the network. | |
in_channels (int): Number of input channels for the first layer. | |
pad (nn.ConstantPad2d): Padding layer to maintain dimensions. | |
twidth (int): Width of the kernel used in convolution. | |
kernel_size (tuple): Kernel size for convolution operations. | |
""" | |
def __init__(self, depth=4, lorder=20, in_channels=64): | |
super(DilatedDenseNet, self).__init__() | |
self.depth = depth | |
self.in_channels = in_channels | |
self.pad = nn.ConstantPad2d((1, 1, 1, 0), value=0.) # Padding for the input | |
self.twidth = lorder * 2 - 1 # Width of the kernel | |
self.kernel_size = (self.twidth, 1) # Kernel size for convolutions | |
# Initialize layers dynamically based on depth | |
for i in range(self.depth): | |
dil = 2 ** i # Calculate dilation rate | |
pad_length = lorder + (dil - 1) * (lorder - 1) - 1 # Calculate padding length | |
setattr(self, 'pad{}'.format(i + 1), nn.ConstantPad2d((0, 0, pad_length, pad_length), value=0.)) # Padding for dilation | |
setattr(self, 'conv{}'.format(i + 1), | |
nn.Conv2d(self.in_channels * (i + 1), self.in_channels, kernel_size=self.kernel_size, | |
dilation=(dil, 1), groups=self.in_channels, bias=False)) # Convolution layer with dilation | |
setattr(self, 'norm{}'.format(i + 1), nn.InstanceNorm2d(in_channels, affine=True)) # Normalization layer | |
setattr(self, 'prelu{}'.format(i + 1), nn.PReLU(self.in_channels)) # Activation layer | |
def forward(self, x): | |
""" | |
Forward pass for the DilatedDenseNet model. | |
Args: | |
x (torch.Tensor): Input tensor of shape (batch_size, in_channels, height, width). | |
Returns: | |
torch.Tensor: Output tensor after applying dense layers. | |
""" | |
skip = x # Initialize skip connection | |
for i in range(self.depth): | |
out = getattr(self, 'pad{}'.format(i + 1))(skip) # Apply padding | |
out = getattr(self, 'conv{}'.format(i + 1))(out) # Apply convolution | |
out = getattr(self, 'norm{}'.format(i + 1))(out) # Apply normalization | |
out = getattr(self, 'prelu{}'.format(i + 1))(out) # Apply PReLU activation | |
skip = th.cat([out, skip], dim=1) # Concatenate the output with the skip connection | |
return out # Return the final output | |
class UniDeepFsmn_dilated(nn.Module): | |
""" | |
UniDeepFsmn_dilated combines the UniDeepFsmn architecture with a dilated dense network | |
to enhance feature extraction while maintaining efficient computation. | |
Attributes: | |
input_dim (int): Dimension of the input features. | |
output_dim (int): Dimension of the output features. | |
depth (int): Depth of the dilated dense network. | |
lorder (int): Length of the order for the convolution layers. | |
hidden_size (int): Number of hidden units in the linear layer. | |
linear (nn.Linear): Linear layer to project input features to hidden size. | |
project (nn.Linear): Linear layer to project hidden features to output dimensions. | |
conv (DilatedDenseNet): Instance of the DilatedDenseNet for feature extraction. | |
""" | |
def __init__(self, input_dim, output_dim, lorder=None, hidden_size=None, depth=2): | |
super(UniDeepFsmn_dilated, self).__init__() | |
self.input_dim = input_dim | |
self.output_dim = output_dim | |
self.depth = depth | |
if lorder is None: | |
return | |
self.lorder = lorder | |
self.hidden_size = hidden_size | |
# Initialize layers | |
self.linear = nn.Linear(input_dim, hidden_size) # Linear transformation to hidden size | |
self.project = nn.Linear(hidden_size, output_dim, bias=False) # Project hidden size to output dimension | |
self.conv = DilatedDenseNet(depth=self.depth, lorder=lorder, in_channels=output_dim) # Dilated dense network for feature extraction | |
def forward(self, input): | |
""" | |
Forward pass for the UniDeepFsmn_dilated model. | |
Args: | |
input (torch.Tensor): Input tensor of shape (batch_size, input_dim). | |
Returns: | |
torch.Tensor: The output tensor of the same shape as input, enhanced by the network. | |
""" | |
f1 = F.relu(self.linear(input)) # Apply linear layer followed by ReLU activation | |
p1 = self.project(f1) # Project to output dimension | |
x = th.unsqueeze(p1, 1) # Add a dimension for compatibility with Conv2d | |
x_per = x.permute(0, 3, 2, 1) # Permute dimensions for convolution | |
out = self.conv(x_per) # Pass through the dilated dense network | |
out1 = out.permute(0, 3, 2, 1) # Permute back to original dimensions | |
return input + out1.squeeze() # Return enhanced input | |