cndn
commited on
Commit
•
bc174ff
1
Parent(s):
0ffb909
adjust
Browse files- README.md +9 -0
- config.yaml +7 -0
- example_config.yaml +0 -40
- example_readme.md +0 -12
README.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: cc-by-4.0
|
3 |
---
|
|
|
|
|
|
1 |
---
|
2 |
+
library_name: fairseq
|
3 |
+
task: audio-to-audio
|
4 |
+
tags:
|
5 |
+
- fairseq
|
6 |
+
- audio
|
7 |
+
- audio-to-audio
|
8 |
+
- speech-to-speech-translation
|
9 |
license: cc-by-4.0
|
10 |
---
|
11 |
+
You can try out the model on the right of the page by uploading or recording.
|
12 |
+
For model usage, please refer to https://huggingface.co/facebook/textless_sm_cs_en
|
config.yaml
CHANGED
@@ -31,3 +31,10 @@ vocoder:
|
|
31 |
model_path: N/A
|
32 |
speaker: false
|
33 |
type: code_hifigan
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
model_path: N/A
|
32 |
speaker: false
|
33 |
type: code_hifigan
|
34 |
+
hub:
|
35 |
+
input_type: fbank80_w_utt_cmvn
|
36 |
+
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
|
37 |
+
unit_vocoder: true
|
38 |
+
generation_args:
|
39 |
+
beam: 10
|
40 |
+
max_len_a: 1
|
example_config.yaml
DELETED
@@ -1,40 +0,0 @@
|
|
1 |
-
data_root: N/A
|
2 |
-
input_channels: 1
|
3 |
-
input_feat_per_channel: 80
|
4 |
-
multitask:
|
5 |
-
source_unit:
|
6 |
-
data: N/A
|
7 |
-
decoder_type: transformer
|
8 |
-
dict: N/A
|
9 |
-
encoder_layer: 6
|
10 |
-
loss_weight: 8.0
|
11 |
-
target_type: text
|
12 |
-
output_channels: 1
|
13 |
-
output_feat_per_channel: 1
|
14 |
-
output_feat_reduction_rate: 0
|
15 |
-
output_sample_rate: 16000
|
16 |
-
specaugment:
|
17 |
-
freq_mask_F: 27
|
18 |
-
freq_mask_N: 1
|
19 |
-
time_mask_N: 1
|
20 |
-
time_mask_T: 100
|
21 |
-
time_mask_p: 1.0
|
22 |
-
time_wrap_W: 0
|
23 |
-
transforms:
|
24 |
-
_eval:
|
25 |
-
- utterance_cmvn
|
26 |
-
_train:
|
27 |
-
- utterance_cmvn
|
28 |
-
- specaugment
|
29 |
-
vocoder:
|
30 |
-
dur_prediction: true
|
31 |
-
model_path: N/A
|
32 |
-
speaker: false
|
33 |
-
type: code_hifigan
|
34 |
-
hub:
|
35 |
-
input_type: fbank80_w_utt_cmvn
|
36 |
-
tts_model_id: pytorch/fairseq:ust:unit_hifigan_mhubert_vp_en_es_fr_it3_400k_layer11_km1000_lj_dur
|
37 |
-
unit_vocoder: true
|
38 |
-
generation_args:
|
39 |
-
beam: 10
|
40 |
-
max_len_a: 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
example_readme.md
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
---
|
2 |
-
library_name: fairseq
|
3 |
-
task: audio-to-audio
|
4 |
-
tags:
|
5 |
-
- fairseq
|
6 |
-
- audio
|
7 |
-
- audio-to-audio
|
8 |
-
- speech-to-speech-translation
|
9 |
-
license: cc-by-4.0
|
10 |
-
---
|
11 |
-
You can try out the model on the right of the page by uploading or recording.
|
12 |
-
For model usage, please refer to https://huggingface.co/facebook/textless_sm_cs_en
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|