ceyda commited on
Commit
3d0bc0a
1 Parent(s): 27503d5

fairseq tuned

Browse files
config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
- "_name_or_path": "./pretrained/checkpoint_0.60",
3
  "activation_dropout": 0.1,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "Wav2Vec2ForCTC"
7
  ],
8
  "attention_dropout": 0.1,
9
- "bos_token_id": 1,
10
  "conv_bias": false,
11
  "conv_dim": [
12
  512,
@@ -35,17 +34,17 @@
35
  2,
36
  2
37
  ],
38
- "ctc_loss_reduction": "mean",
39
- "ctc_zero_infinity": true,
40
  "do_stable_layer_norm": false,
41
  "eos_token_id": 2,
42
  "feat_extract_activation": "gelu",
43
  "feat_extract_norm": "group",
44
  "feat_proj_dropout": 0.1,
45
  "final_dropout": 0.1,
46
- "gradient_checkpointing": true,
47
  "hidden_act": "gelu",
48
- "hidden_dropout": 0.05,
49
  "hidden_size": 768,
50
  "initializer_range": 0.02,
51
  "intermediate_size": 3072,
@@ -54,14 +53,14 @@
54
  "mask_feature_length": 10,
55
  "mask_feature_prob": 0.0,
56
  "mask_time_length": 10,
57
- "mask_time_prob": 0.5,
58
  "model_type": "wav2vec2",
59
  "num_attention_heads": 12,
60
  "num_conv_pos_embedding_groups": 16,
61
  "num_conv_pos_embeddings": 128,
62
  "num_feat_extract_layers": 7,
63
  "num_hidden_layers": 12,
64
- "pad_token_id": 36,
65
- "transformers_version": "4.5.0.dev0",
66
- "vocab_size": 37
67
  }
 
1
  {
 
2
  "activation_dropout": 0.1,
3
  "apply_spec_augment": true,
4
  "architectures": [
5
  "Wav2Vec2ForCTC"
6
  ],
7
  "attention_dropout": 0.1,
8
+ "bos_token_id": 0,
9
  "conv_bias": false,
10
  "conv_dim": [
11
  512,
 
34
  2,
35
  2
36
  ],
37
+ "ctc_loss_reduction": "sum",
38
+ "ctc_zero_infinity": false,
39
  "do_stable_layer_norm": false,
40
  "eos_token_id": 2,
41
  "feat_extract_activation": "gelu",
42
  "feat_extract_norm": "group",
43
  "feat_proj_dropout": 0.1,
44
  "final_dropout": 0.1,
45
+ "gradient_checkpointing": false,
46
  "hidden_act": "gelu",
47
+ "hidden_dropout": 0.1,
48
  "hidden_size": 768,
49
  "initializer_range": 0.02,
50
  "intermediate_size": 3072,
 
53
  "mask_feature_length": 10,
54
  "mask_feature_prob": 0.0,
55
  "mask_time_length": 10,
56
+ "mask_time_prob": 0.05,
57
  "model_type": "wav2vec2",
58
  "num_attention_heads": 12,
59
  "num_conv_pos_embedding_groups": 16,
60
  "num_conv_pos_embeddings": 128,
61
  "num_feat_extract_layers": 7,
62
  "num_hidden_layers": 12,
63
+ "pad_token_id": 1,
64
+ "transformers_version": "4.4.2",
65
+ "vocab_size": 39
66
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb5a521af5105ca67abd19a841c08e97b071a8b69e186b06b25633c6fc36804
3
- size 377690860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac7eaad990b15315d1772928ea15b9c77d2e259311b5189f9772b04da157294
3
+ size 377691502
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|","special_tokens_map_file": "/home/ceyda/workspace/libs/fairseq/hf_finetuned_output/special_tokens_map.json", "tokenizer_file": null}
vocab.json CHANGED
@@ -1 +1,39 @@
1
- {"e": 0, "x": 1, "î": 2, "l": 3, "â": 4, "j": 5, "ç": 6, "ş": 7, "g": 8, "ı": 9, "v": 10, "d": 11, "t": 12, "n": 13, "a": 14, "c": 15, "h": 16, "p": 17, "r": 18, "w": 19, "z": 20, "k": 21, "u": 22, "b": 23, "ü": 24, "y": 26, "o": 27, "q": 28, "m": 29, "f": 30, "s": 31, "ö": 32, "ğ": 33, "i": 34, "|": 25, "[UNK]": 35, "[PAD]": 36}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"|": 4,
2
+ "p": 5,
3
+ "i": 6,
4
+ "r": 7,
5
+ "n": 8,
6
+ "s": 9,
7
+ "ö": 10,
8
+ "z": 11,
9
+ "l": 12,
10
+ "e": 13,
11
+ "h": 14,
12
+ "â": 15,
13
+ "y": 16,
14
+ "a": 17,
15
+ "k": 18,
16
+ "ı": 19,
17
+ "o": 20,
18
+ "m": 21,
19
+ "ü": 22,
20
+ "g": 23,
21
+ "c": 24,
22
+ "b": 25,
23
+ "ş": 26,
24
+ "d": 27,
25
+ "u": 28,
26
+ "t": 29,
27
+ "ç": 30,
28
+ "ğ": 31,
29
+ "v": 32,
30
+ "f": 33,
31
+ "j": 34,
32
+ "x": 35,
33
+ "w": 36,
34
+ "q": 37,
35
+ "î": 38,
36
+ "<s>": 0,
37
+ "<pad>": 1,
38
+ "</s>": 2,
39
+ "<unk>": 3}