|
|
import torch |
|
|
import torch.nn as nn |
|
|
import torch.nn.functional as F |
|
|
import json |
|
|
import os |
|
|
|
|
|
class ResidualBlock(nn.Module): |
|
|
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): |
|
|
super().__init__() |
|
|
self.convs1 = nn.ModuleList([ |
|
|
nn.Conv1d(channels, channels, kernel_size, 1, dilation=d, padding=d) |
|
|
for d in dilation |
|
|
]) |
|
|
self.convs2 = nn.ModuleList([ |
|
|
nn.Conv1d(channels, channels, kernel_size, 1, dilation=1, padding=1) |
|
|
for _ in dilation |
|
|
]) |
|
|
|
|
|
def forward(self, x): |
|
|
for c1, c2 in zip(self.convs1, self.convs2): |
|
|
xt = F.leaky_relu(x, 0.1) |
|
|
xt = c1(xt) |
|
|
xt = F.leaky_relu(xt, 0.1) |
|
|
xt = c2(xt) |
|
|
x = xt + x |
|
|
return x |
|
|
|
|
|
class RVCModel(nn.Module): |
|
|
def __init__(self, config): |
|
|
super().__init__() |
|
|
self.config = config |
|
|
model_cfg = config["model"] |
|
|
|
|
|
self.encoder = nn.Sequential( |
|
|
nn.Conv1d(128, model_cfg["upsample_initial_channel"], 7, 1, 3), |
|
|
*[ResidualBlock(model_cfg["upsample_initial_channel"]) for _ in range(3)] |
|
|
) |
|
|
|
|
|
self.decoder = nn.Sequential( |
|
|
nn.Conv1d(model_cfg["upsample_initial_channel"], 128, 7, 1, 3), |
|
|
) |
|
|
|
|
|
def forward(self, x): |
|
|
encoded = self.encoder(x) |
|
|
decoded = self.decoder(encoded) |
|
|
return decoded |
|
|
|
|
|
def convert_voice(self, audio_path): |
|
|
return audio_path |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls, model_path): |
|
|
config_path = os.path.join(model_path, "config.json") |
|
|
with open(config_path, "r") as f: |
|
|
config = json.load(f) |
|
|
|
|
|
model = cls(config) |
|
|
|
|
|
model_file = os.path.join(model_path, "model.pth") |
|
|
if os.path.exists(model_file): |
|
|
model.load_state_dict(torch.load(model_file, map_location="cpu")) |
|
|
|
|
|
return model |
|
|
|