smutuvi commited on
Commit
13c7bdf
1 Parent(s): 9751eb1

smutuvi/wav2vec2-large-xlsr-sw_ndizi_782_100_epoch

Browse files
README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: smutuvi/wav2vec2-large-xlsr-sw
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - wer
8
+ model-index:
9
+ - name: wav2vec2-large-xlsr-sw_ndizi_782_100_epochs
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # wav2vec2-large-xlsr-sw_ndizi_782_100_epochs
17
+
18
+ This model is a fine-tuned version of [smutuvi/wav2vec2-large-xlsr-sw](https://huggingface.co/smutuvi/wav2vec2-large-xlsr-sw) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 3.1009
21
+ - Wer: 0.4847
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 0.0003
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 8
43
+ - seed: 42
44
+ - gradient_accumulation_steps: 2
45
+ - total_train_batch_size: 8
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - lr_scheduler_warmup_steps: 500
49
+ - num_epochs: 100
50
+ - mixed_precision_training: Native AMP
51
+
52
+ ### Training results
53
+
54
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
55
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
56
+ | 1.4035 | 4.79 | 400 | 1.2492 | 0.5608 |
57
+ | 0.8489 | 9.58 | 800 | 1.0208 | 0.5114 |
58
+ | 0.632 | 14.37 | 1200 | 1.3292 | 0.5306 |
59
+ | 0.4653 | 19.16 | 1600 | 1.5159 | 0.5109 |
60
+ | 0.3598 | 23.95 | 2000 | 1.4650 | 0.5450 |
61
+ | 0.2776 | 28.74 | 2400 | 1.8568 | 0.5124 |
62
+ | 0.218 | 33.53 | 2800 | 2.0913 | 0.5188 |
63
+ | 0.1711 | 38.32 | 3200 | 2.2706 | 0.5035 |
64
+ | 0.141 | 43.11 | 3600 | 2.3050 | 0.5094 |
65
+ | 0.1162 | 47.9 | 4000 | 2.4539 | 0.5025 |
66
+ | 0.1007 | 52.69 | 4400 | 2.4754 | 0.5020 |
67
+ | 0.0881 | 57.49 | 4800 | 2.5512 | 0.5030 |
68
+ | 0.0816 | 62.28 | 5200 | 2.6458 | 0.5064 |
69
+ | 0.0792 | 67.07 | 5600 | 2.7869 | 0.5025 |
70
+ | 0.06 | 71.86 | 6000 | 2.9063 | 0.5040 |
71
+ | 0.0594 | 76.65 | 6400 | 2.8363 | 0.5049 |
72
+ | 0.0527 | 81.44 | 6800 | 3.0801 | 0.4921 |
73
+ | 0.0473 | 86.23 | 7200 | 3.0959 | 0.4867 |
74
+ | 0.0471 | 91.02 | 7600 | 3.0942 | 0.4852 |
75
+ | 0.0405 | 95.81 | 8000 | 3.1009 | 0.4847 |
76
+
77
+
78
+ ### Framework versions
79
+
80
+ - Transformers 4.37.1
81
+ - Pytorch 2.2.1+cu118
82
+ - Datasets 2.16.1
83
+ - Tokenizers 0.15.0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 60,
3
+ "<s>": 59
4
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b176dab8648b9b163cbafe7f51d35dd6060de96f4c31f4f996ce8d3f6e7a89b1
3
  size 1262057580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c061c03b90ba153cdd6765384ddb007ed10b0cdbcee804bc0a84865cc21ac33
3
  size 1262057580
preprocessor_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
 
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
runs/Mar22_05-32-55_hades-prod01/events.out.tfevents.1711085640.hades-prod01.437015.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d943f60bc64d15b6ea55295b55b696e7f83db31dfedb7ec8222444e5ac3df17
3
- size 15620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dff4db984a7efbc25a5d02f7ffe2167e23b33b2b295c8cb932c7e6c26e63477
3
+ size 15974
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "57": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "58": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "59": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "60": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": true,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2Processor",
43
+ "replace_word_delimiter_char": " ",
44
+ "target_lang": null,
45
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
46
+ "unk_token": "[UNK]",
47
+ "word_delimiter_token": "|"
48
+ }
vocab.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "'": 35,
3
+ "(": 33,
4
+ ")": 5,
5
+ "*": 52,
6
+ "/": 48,
7
+ "=": 44,
8
+ "[PAD]": 58,
9
+ "[UNK]": 57,
10
+ "_": 18,
11
+ "`": 6,
12
+ "a": 51,
13
+ "b": 16,
14
+ "c": 37,
15
+ "d": 28,
16
+ "e": 24,
17
+ "f": 54,
18
+ "g": 56,
19
+ "h": 30,
20
+ "i": 17,
21
+ "j": 3,
22
+ "k": 19,
23
+ "l": 47,
24
+ "m": 55,
25
+ "n": 23,
26
+ "o": 21,
27
+ "p": 1,
28
+ "q": 34,
29
+ "r": 50,
30
+ "s": 4,
31
+ "t": 0,
32
+ "u": 22,
33
+ "v": 26,
34
+ "w": 15,
35
+ "x": 2,
36
+ "y": 53,
37
+ "z": 11,
38
+ "|": 14,
39
+ "°": 43,
40
+ "µ": 46,
41
+ "á": 12,
42
+ "â": 41,
43
+ "ã": 45,
44
+ "å": 7,
45
+ "é": 36,
46
+ "ë": 49,
47
+ "í": 39,
48
+ "ï": 38,
49
+ "ñ": 13,
50
+ "ó": 9,
51
+ "ö": 29,
52
+ "ø": 27,
53
+ "ú": 40,
54
+ "š": 31,
55
+ "ū": 8,
56
+ "ː": 25,
57
+ "ụ": 10,
58
+ "’": 32,
59
+ "•": 42,
60
+ "…": 20
61
+ }