g8a9 commited on
Commit
f4b9bb8
1 Parent(s): db0d830

Training in progress, step 400

Browse files
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"<s>": 140, "</s>": 141}
1
+ {"<s>": 177, "</s>": 178}
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
@@ -59,20 +59,12 @@
59
  "intermediate_size": 4096,
60
  "layer_norm_eps": 1e-05,
61
  "layerdrop": 0.0,
62
- "mask_channel_length": 10,
63
- "mask_channel_min_space": 1,
64
- "mask_channel_other": 0.0,
65
- "mask_channel_prob": 0.0,
66
- "mask_channel_selection": "static",
67
  "mask_feature_length": 10,
68
  "mask_feature_min_masks": 0,
69
  "mask_feature_prob": 0.0,
70
  "mask_time_length": 10,
71
  "mask_time_min_masks": 2,
72
- "mask_time_min_space": 1,
73
- "mask_time_other": 0.0,
74
  "mask_time_prob": 0.05,
75
- "mask_time_selection": "static",
76
  "model_type": "wav2vec2",
77
  "num_adapter_layers": 3,
78
  "num_attention_heads": 16,
@@ -84,7 +76,7 @@
84
  "num_hidden_layers": 24,
85
  "num_negatives": 100,
86
  "output_hidden_size": 1024,
87
- "pad_token_id": 139,
88
  "proj_codevector_dim": 768,
89
  "tdnn_dilation": [
90
  1,
@@ -110,6 +102,6 @@
110
  "torch_dtype": "float32",
111
  "transformers_version": "4.16.0.dev0",
112
  "use_weighted_layer_sum": false,
113
- "vocab_size": 142,
114
  "xvector_output_dim": 512
115
  }
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-300m",
3
  "activation_dropout": 0.0,
4
  "adapter_kernel_size": 3,
5
  "adapter_stride": 2,
59
  "intermediate_size": 4096,
60
  "layer_norm_eps": 1e-05,
61
  "layerdrop": 0.0,
 
 
 
 
 
62
  "mask_feature_length": 10,
63
  "mask_feature_min_masks": 0,
64
  "mask_feature_prob": 0.0,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
 
 
67
  "mask_time_prob": 0.05,
 
68
  "model_type": "wav2vec2",
69
  "num_adapter_layers": 3,
70
  "num_attention_heads": 16,
76
  "num_hidden_layers": 24,
77
  "num_negatives": 100,
78
  "output_hidden_size": 1024,
79
+ "pad_token_id": 176,
80
  "proj_codevector_dim": 768,
81
  "tdnn_dilation": [
82
  1,
102
  "torch_dtype": "float32",
103
  "transformers_version": "4.16.0.dev0",
104
  "use_weighted_layer_sum": false,
105
+ "vocab_size": 179,
106
  "xvector_output_dim": 512
107
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0ba4c4cf228305b2995884e4422a72b3beed7dbd09cc6f56feb1f1d4bdfe501
3
- size 1262505841
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88f20f6e89a675b120eba8601af65ec70b8a6b3f20a32bf672b18ee257570478
3
+ size 1262657585
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "../robust-asr-italian", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "../wav2vec2-xls-r-300m-italian", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b79d7de43fc2d318cf10cb0fd3f0a51d6135f75e4220295c3e09ecb71abfc13
3
- size 3055
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417385b9ff1065433b0750b3d81a236a56bc44cd5710c59b255b972eb9b3a94e
3
+ size 3119
vocab.json CHANGED
@@ -1 +1 @@
1
- {"<": 1, "=": 2, ">": 3, "[": 4, "]": 5, "_": 6, "`": 7, "a": 8, "b": 9, "c": 10, "d": 11, "e": 12, "f": 13, "g": 14, "h": 15, "i": 16, "j": 17, "k": 18, "l": 19, "m": 20, "n": 21, "o": 22, "p": 23, "q": 24, "r": 25, "s": 26, "t": 27, "u": 28, "v": 29, "w": 30, "x": 31, "y": 32, "z": 33, "~": 34, "¡": 35, "«": 36, "°": 37, "´": 38, "µ": 39, "»": 40, "ß": 41, "à": 42, "á": 43, "ã": 44, "ä": 45, "å": 46, "æ": 47, "è": 48, "é": 49, "ê": 50, "ë": 51, "ì": 52, "í": 53, "î": 54, "ï": 55, "ð": 56, "ñ": 57, "ò": 58, "ó": 59, "ô": 60, "ö": 61, "ø": 62, "ù": 63, "ú": 64, "û": 65, "ü": 66, "þ": 67, "ÿ": 68, "ā": 69, "ć": 70, "č": 71, "đ": 72, "ė": 73, "ę": 74, "ě": 75, "ğ": 76, "ī": 77, "ı": 78, "ľ": 79, "ł": 80, "ń": 81, "ň": 82, "ō": 83, "ő": 84, "œ": 85, "ř": 86, "ś": 87, "ş": 88, "š": 89, "ū": 90, "ź": 91, "ž": 92, "ș": 93, "ț": 94, "ə": 95, "ʹ": 96, "ʻ": 97, "ʼ": 98, "ʾ": 99, "ʿ": 100, "̇": 101, "а": 102, "б": 103, "е": 104, "л": 105, "ц": 106, "ъ": 107, "ё": 108, "љ": 109, "ד": 110, "ה": 111, "ו": 112, "": 113, "": 114, "": 115, "": 116, "": 117, "": 118, "": 119, "": 120, "": 121, "": 122, "": 123, "": 124, "": 125, "": 126, "": 127, "": 128, "": 129, "": 130, "": 131, "": 132, "": 133, "": 134, "": 135, "": 136, "": 137, "|": 0, "[UNK]": 138, "[PAD]": 139}
1
+ {"<": 1, "=": 2, ">": 3, "[": 4, "]": 5, "_": 6, "`": 7, "a": 8, "b": 9, "c": 10, "d": 11, "e": 12, "f": 13, "g": 14, "h": 15, "i": 16, "j": 17, "k": 18, "l": 19, "m": 20, "n": 21, "o": 22, "p": 23, "q": 24, "r": 25, "s": 26, "t": 27, "u": 28, "v": 29, "w": 30, "x": 31, "y": 32, "z": 33, "{": 34, "}": 35, "~": 36, "¡": 37, "«": 38, "°": 39, "´": 40, "µ": 41, "º": 42, "»": 43, "ß": 44, "à": 45, "á": 46, "ã": 47, "ä": 48, "å": 49, "æ": 50, "è": 51, "é": 52, "ê": 53, "ë": 54, "ì": 55, "í": 56, "î": 57, "ï": 58, "ð": 59, "ñ": 60, "ò": 61, "ó": 62, "ô": 63, "ö": 64, "ø": 65, "ù": 66, "ú": 67, "û": 68, "ü": 69, "þ": 70, "ÿ": 71, "ā": 72, "ą": 73, "ć": 74, "č": 75, "đ": 76, "ė": 77, "ę": 78, "ě": 79, "ğ": 80, "ħ": 81, "ī": 82, "ı": 83, "ľ": 84, "ł": 85, "ń": 86, "ň": 87, "ō": 88, "ő": 89, "œ": 90, "ř": 91, "ś": 92, "ş": 93, "š": 94, "ū": 95, "ŭ": 96, "ź": 97, "ż": 98, "ž": 99, "ș": 100, "ț": 101, "ə": 102, "ʹ": 103, "ʻ": 104, "ʼ": 105, "ʾ": 106, "ʿ": 107, "ː": 108, "̇": 109, "̨": 110, "а": 111, "б": 112, "д": 113, "е": 114, "л": 115, "н": 116, "о": 117, "с": 118, "у": 119, "ц": 120, "ъ": 121, "ё": 122, "љ": 123, "ң": 124, "ד": 125, "ה": 126, "ו": 127, "ة": 128, "ر": 129, "س": 130, "ص": 131, "غ": 132, "ل": 133, "ي": 134, "": 135, "": 136, "": 137, "ṭ": 138, "ễ": 139, "‑": 140, "–": 141, "—": 142, "’": 143, "„": 144, "…": 145, "′": 146, "☆": 147, "あ": 148, "ア": 149, "カ": 150, "キ": 151, "サ": 152, "ザ": 153, "ノ": 154, "フ": 155, "リ": 156, "ン": 157, "・": 158, "ー": 159, "万": 160, "三": 161, "丰": 162, "古": 163, "多": 164, "家": 165, "峰": 166, "張": 167, "旅": 168, "禅": 169, "ꞌ": 170, "심": 171, "악": 172, "음": 173, "중": 174, "|": 0, "[UNK]": 175, "[PAD]": 176}