yamathcy commited on
Commit
0ca977d
1 Parent(s): 769d57d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - m-a-p/MERT-v0-public
4
+ - facebook/hubert-base-ls960
5
+ library_name: transformers
6
+ tags:
7
+ - mergekit
8
+ - merge
9
+
10
+ ---
11
+ # merged_model
12
+
13
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
14
+
15
+ ## Merge Details
16
+ ### Merge Method
17
+
18
+ This model was merged using the SLERP merge method.
19
+
20
+ ### Models Merged
21
+
22
+ The following models were included in the merge:
23
+ * [m-a-p/MERT-v0-public](https://huggingface.co/m-a-p/MERT-v0-public)
24
+ * [facebook/hubert-base-ls960](https://huggingface.co/facebook/hubert-base-ls960)
25
+
26
+ ### Configuration
27
+
28
+ The following YAML configuration was used to produce this model:
29
+
30
+ ```yaml
31
+ slices:
32
+ - sources:
33
+ - model: facebook/hubert-base-ls960
34
+ layer_range: [0, 12]
35
+ - model: m-a-p/MERT-v0-public
36
+ layer_range: [0, 12]
37
+ trast_remote_code: true
38
+ merge_method: slerp
39
+ base_model:
40
+ model: facebook/hubert-base-ls960
41
+ override_architecture: HubertModel
42
+ parameters:
43
+ t:
44
+ - filter: self_attn
45
+ value: [0, 0.5, 0.3, 0.7, 1]
46
+ - filter: mlp
47
+ value: [1, 0.5, 0.7, 0.3, 0]
48
+ - value: 0.5
49
+ ```
config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/hubert-base-ls960",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "HubertModel"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_dropout": 0.0,
45
+ "feat_extract_norm": "group",
46
+ "feat_proj_dropout": 0.1,
47
+ "feat_proj_layer_norm": true,
48
+ "final_dropout": 0.1,
49
+ "gradient_checkpointing": false,
50
+ "hidden_act": "gelu",
51
+ "hidden_dropout": 0.1,
52
+ "hidden_dropout_prob": 0.1,
53
+ "hidden_size": 768,
54
+ "initializer_range": 0.02,
55
+ "intermediate_size": 3072,
56
+ "layer_norm_eps": 1e-05,
57
+ "layerdrop": 0.1,
58
+ "mask_feature_length": 10,
59
+ "mask_feature_min_masks": 0,
60
+ "mask_feature_prob": 0.0,
61
+ "mask_time_length": 10,
62
+ "mask_time_min_masks": 2,
63
+ "mask_time_prob": 0.05,
64
+ "model_type": "hubert",
65
+ "num_attention_heads": 12,
66
+ "num_conv_pos_embedding_groups": 16,
67
+ "num_conv_pos_embeddings": 128,
68
+ "num_feat_extract_layers": 7,
69
+ "num_hidden_layers": 12,
70
+ "pad_token_id": 0,
71
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
72
+ "transformers_version": "4.37.1",
73
+ "use_weighted_layer_sum": false,
74
+ "vocab_size": 32
75
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - model: facebook/hubert-base-ls960
4
+ layer_range: [0, 12]
5
+ - model: m-a-p/MERT-v0-public
6
+ layer_range: [0, 12]
7
+ trast_remote_code: true
8
+ merge_method: slerp
9
+ base_model:
10
+ model: facebook/hubert-base-ls960
11
+ override_architecture: HubertModel
12
+ parameters:
13
+ t:
14
+ - filter: self_attn
15
+ value: [0, 0.5, 0.3, 0.7, 1]
16
+ - filter: mlp
17
+ value: [1, 0.5, 0.7, 0.3, 0]
18
+ - value: 0.5
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f385f64327eb629a567738a5b7c44b905da97ddea246173497be346290e122fb
3
+ size 341819976
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.2", "total_size": 341797888}, "weight_map": {"encoder.layers.0.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.0.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.0.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.1.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.1.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.10.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.10.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.11.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.11.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.2.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.2.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.3.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.3.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.4.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.4.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.5.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.5.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.6.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.6.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.7.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.7.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.8.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.8.layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.k_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.k_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.out_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.out_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.q_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.q_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.v_proj.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.attention.v_proj.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.feed_forward.intermediate_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.feed_forward.intermediate_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.feed_forward.output_dense.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.feed_forward.output_dense.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.final_layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.final_layer_norm.weight": "model-00001-of-00001.safetensors", "encoder.layers.9.layer_norm.bias": "model-00001-of-00001.safetensors", "encoder.layers.9.layer_norm.weight": "model-00001-of-00001.safetensors", "feature_projection.layer_norm.bias": "model-00001-of-00001.safetensors", "feature_projection.layer_norm.weight": "model-00001-of-00001.safetensors", "feature_projection.projection.bias": "model-00001-of-00001.safetensors", "feature_projection.projection.weight": "model-00001-of-00001.safetensors"}}