flavioschneider commited on
Commit
b8272e6
1 Parent(s): 778b3c8

Upload Vocoder

Browse files
Files changed (3) hide show
  1. config.json +6 -1
  2. model.py +35 -0
  3. pytorch_model.bin +3 -0
config.json CHANGED
@@ -1,7 +1,12 @@
1
  {
 
 
 
2
  "auto_map": {
3
- "AutoConfig": "config.VocoderConfig"
 
4
  },
5
  "model_type": "archinetai/vocoder-v1",
 
6
  "transformers_version": "4.24.0"
7
  }
 
1
  {
2
+ "architectures": [
3
+ "Vocoder"
4
+ ],
5
  "auto_map": {
6
+ "AutoConfig": "config.VocoderConfig",
7
+ "AutoModel": "model.Vocoder"
8
  },
9
  "model_type": "archinetai/vocoder-v1",
10
+ "torch_dtype": "float32",
11
  "transformers_version": "4.24.0"
12
  }
model.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import Tensor
3
+ from transformers import PreTrainedModel
4
+ from audio_diffusion_pytorch import DiffusionVocoder, UNetV0, VDiffusion, VSampler
5
+ from .config import VocoderConfig
6
+
7
+
8
+ class Vocoder(PreTrainedModel):
9
+
10
+ config_class = VocoderConfig
11
+
12
+ def __init__(self, config: VocoderConfig):
13
+ super().__init__(config)
14
+
15
+ self.model = DiffusionVocoder(
16
+ net_t=UNetV0,
17
+ mel_channels=80,
18
+ mel_n_fft=1024,
19
+ mel_sample_rate=48000,
20
+ mel_normalize_log=True,
21
+ channels=[8, 32, 64, 256, 256, 512, 512, 1024, 1024],
22
+ factors=[1, 4, 4, 4, 2, 2, 2, 2, 2],
23
+ items=[1, 2, 2, 2, 2, 2, 2, 4, 4],
24
+ diffusion_t=VDiffusion,
25
+ sampler_t=VSampler
26
+ )
27
+
28
+ def to_spectrogram(self, *args, **kwargs):
29
+ return self.model.to_spectrogram(*args, **kwargs)
30
+
31
+ @torch.no_grad()
32
+ def sample(self, *args, **kwargs):
33
+ return self.model.sample(*args, **kwargs)
34
+
35
+
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78faa8d5c179f6391b4076db3585d1175d7bcf621bc648fa1b55988f02a01a39
3
+ size 712909241