apsys commited on
Commit
a16af14
1 Parent(s): 8315872

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/speechtokenizer_hubert_avg_config-checkpoint.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 3,
4
+ "batch_size": 60,
5
+ "learning_rate": 0.0001,
6
+ "adam_b1": 0.5,
7
+ "adam_b2": 0.9,
8
+ "lr_decay": 0.98,
9
+ "seed": 1234,
10
+ "lambda_distill": 0.15,
11
+
12
+ "n_filters": 64,
13
+ "strides": [8,5,4,2],
14
+ "dimension": 1024,
15
+ "semantic_dimension": 768,
16
+ "bidirectional": true,
17
+ "dilation_base": 2,
18
+ "residual_kernel_size": 3,
19
+ "n_residual_layers": 1,
20
+ "lstm_layers": 2,
21
+ "activation": "ELU",
22
+
23
+
24
+ "segment_size": 48000,
25
+ "num_mels": 80,
26
+ "num_freq": 1025,
27
+ "n_fft": 1024,
28
+ "hop_size": 240,
29
+ "win_size": 1024,
30
+
31
+ "sampling_rate": 16000,
32
+ "sample_rate": 16000,
33
+
34
+ "codebook_size": 1024,
35
+ "n_q": 8,
36
+
37
+ "fmin": 0,
38
+ "fmax": 8000,
39
+ "fmax_for_loss": null,
40
+
41
+ "num_workers": 12,
42
+
43
+ "dist_config": {
44
+ "dist_backend": "nccl",
45
+ "dist_url": "tcp://localhost:54322",
46
+ "world_size": 1
47
+ }
48
+ }
SpeechTokenizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04593b6c9a4b475f91ca481141a6ef5b23e6ac112f347dd2b2717f193c1c728
3
+ size 481906997
WavTokenizer_small_600_24k_4096.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44c40fbb83d2d42329ac098e252a31b5708fb7b3bf864d108dd3ed26911d004
3
+ size 1589082492
speechtokenizer_hubert_avg_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 3,
4
+ "batch_size": 60,
5
+ "learning_rate": 0.0001,
6
+ "adam_b1": 0.5,
7
+ "adam_b2": 0.9,
8
+ "lr_decay": 0.98,
9
+ "seed": 1234,
10
+ "lambda_distill": 0.15,
11
+
12
+ "n_filters": 64,
13
+ "strides": [8,5,4,2],
14
+ "dimension": 1024,
15
+ "semantic_dimension": 768,
16
+ "bidirectional": true,
17
+ "dilation_base": 2,
18
+ "residual_kernel_size": 3,
19
+ "n_residual_layers": 1,
20
+ "lstm_layers": 2,
21
+ "activation": "ELU",
22
+
23
+
24
+ "segment_size": 48000,
25
+ "num_mels": 80,
26
+ "num_freq": 1025,
27
+ "n_fft": 1024,
28
+ "hop_size": 240,
29
+ "win_size": 1024,
30
+
31
+ "sampling_rate": 16000,
32
+ "sample_rate": 16000,
33
+
34
+ "codebook_size": 1024,
35
+ "n_q": 8,
36
+
37
+ "fmin": 0,
38
+ "fmax": 8000,
39
+ "fmax_for_loss": null,
40
+
41
+ "num_workers": 12,
42
+
43
+ "dist_config": {
44
+ "dist_backend": "nccl",
45
+ "dist_url": "tcp://localhost:54322",
46
+ "world_size": 1
47
+ }
48
+ }
wavtokenizer_large_speech_320_24k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7450020c154f6aba033cb8651466cb79cb1b1cdd10ea64eaba68e7871cabcc5a
3
+ size 1754880958
wavtokenizer_large_unify_600_24k.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72182c1b6bd5ea7f84cf3ec78a0a3244cf42daa660b2e9bce23f5d74064d8205
3
+ size 1759224573