marinone94 commited on
Commit
bbba84f
1 Parent(s): fbe783d

Training in progress, step 100

Browse files
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1722a491ea58423692abe6cde8aac0703fa17d8346a9ae868114f2509e04c97
3
  size 2490337361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a37fc39f7fbbd9e4e27f32438be5a7defea135497c5dd3778613d2e5336ad19
3
  size 2490337361
checkpoint-100/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7998265fd0c18bea1f246ca75cca77edd2def33646432a9437b4d839e97eab3d
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71ffcd1b91ea379737ddde0779f635b716bdd28dd2319a1848a317ef50fd710
3
  size 1262063089
checkpoint-100/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24a8cdd3a3488a3315f9617b64f16798a9e655a6d49b281bfbe024d5e3f2a880
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b19e6c7c1493b5479a5fdad58d60d11a146aedd47536529f399dc05f5b158e
3
+ size 14567
checkpoint-100/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f28dfdeb34cb1b60f5d9c9b44dc5523bf2866ea557da6b58b27104a0030e0b3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a3423b2fe42f204bc8fe2c666ff379f9fd753a0f13613064a5e71e86b519e8
3
  size 559
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6af49a56cc0b3dfaf6c4d29d3b9ab21c908c0ad2d95e41f19e6b8101ed863a39
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48ba5c556f6ae479cce41c7f298c01a0d3452634d5c926dc7e48294e0c69c304
3
  size 623
checkpoint-100/trainer_state.json CHANGED
@@ -1,121 +1,64 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3311258278145695,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.13,
12
- "learning_rate": 4.9999999999999996e-05,
13
- "loss": 13.2048,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.26,
18
- "learning_rate": 7.46938775510204e-05,
19
- "loss": 26.5657,
20
- "step": 20
21
- },
22
  {
23
  "epoch": 0.26,
24
- "eval_loss": 17.199691772460938,
25
- "eval_runtime": 5.248,
26
- "eval_samples_per_second": 18.674,
27
- "eval_steps_per_second": 0.762,
28
- "eval_wer": 1.0,
29
  "step": 20
30
  },
31
  {
32
- "epoch": 0.4,
33
- "learning_rate": 7.36734693877551e-05,
34
- "loss": 8.0091,
35
- "step": 30
36
- },
37
- {
38
- "epoch": 0.53,
39
- "learning_rate": 7.265306122448979e-05,
40
- "loss": 7.6332,
41
  "step": 40
42
  },
43
  {
44
- "epoch": 0.53,
45
- "eval_loss": 4.783994197845459,
46
- "eval_runtime": 4.9818,
47
- "eval_samples_per_second": 19.672,
48
- "eval_steps_per_second": 0.803,
49
  "eval_wer": 1.0,
50
- "step": 40
51
- },
52
- {
53
- "epoch": 0.66,
54
- "learning_rate": 7.163265306122449e-05,
55
- "loss": 3.8261,
56
  "step": 50
57
  },
58
  {
59
- "epoch": 0.79,
60
- "learning_rate": 7.061224489795918e-05,
61
- "loss": 3.8717,
62
  "step": 60
63
  },
64
  {
65
- "epoch": 0.79,
66
- "eval_loss": 3.6783852577209473,
67
- "eval_runtime": 4.9141,
68
- "eval_samples_per_second": 19.943,
69
- "eval_steps_per_second": 0.814,
70
- "eval_wer": 1.0,
71
- "step": 60
72
- },
73
- {
74
- "epoch": 0.93,
75
- "learning_rate": 6.959183673469387e-05,
76
- "loss": 3.7607,
77
- "step": 70
78
- },
79
- {
80
- "epoch": 1.07,
81
- "learning_rate": 6.857142857142857e-05,
82
- "loss": 3.638,
83
  "step": 80
84
  },
85
  {
86
- "epoch": 1.07,
87
- "eval_loss": 3.4979612827301025,
88
- "eval_runtime": 5.0758,
89
- "eval_samples_per_second": 19.307,
90
- "eval_steps_per_second": 0.788,
91
- "eval_wer": 1.0,
92
- "step": 80
93
- },
94
- {
95
- "epoch": 1.2,
96
- "learning_rate": 6.755102040816325e-05,
97
- "loss": 3.4601,
98
- "step": 90
99
- },
100
- {
101
- "epoch": 1.33,
102
- "learning_rate": 6.653061224489796e-05,
103
- "loss": 3.4138,
104
  "step": 100
105
  },
106
  {
107
- "epoch": 1.33,
108
- "eval_loss": 3.489065170288086,
109
- "eval_runtime": 4.9074,
110
- "eval_samples_per_second": 19.97,
111
- "eval_steps_per_second": 0.815,
112
  "eval_wer": 1.0,
113
  "step": 100
114
  }
115
  ],
116
- "max_steps": 750,
117
  "num_train_epochs": 10,
118
- "total_flos": 2.00880445675488e+18,
119
  "trial_name": null,
120
  "trial_params": null
121
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.281150159744409,
5
  "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.26,
12
+ "learning_rate": 0.0007307692307692308,
13
+ "loss": 4.2559,
 
 
 
14
  "step": 20
15
  },
16
  {
17
+ "epoch": 0.51,
18
+ "learning_rate": 0.0007115384615384615,
19
+ "loss": 3.068,
 
 
 
 
 
 
20
  "step": 40
21
  },
22
  {
23
+ "epoch": 0.64,
24
+ "eval_loss": 3.058549165725708,
25
+ "eval_runtime": 3.3353,
26
+ "eval_samples_per_second": 29.982,
27
+ "eval_steps_per_second": 1.199,
28
  "eval_wer": 1.0,
 
 
 
 
 
 
29
  "step": 50
30
  },
31
  {
32
+ "epoch": 0.77,
33
+ "learning_rate": 0.0006923076923076924,
34
+ "loss": 3.0637,
35
  "step": 60
36
  },
37
  {
38
+ "epoch": 1.03,
39
+ "learning_rate": 0.0006730769230769232,
40
+ "loss": 3.0752,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  "step": 80
42
  },
43
  {
44
+ "epoch": 1.28,
45
+ "learning_rate": 0.0006538461538461538,
46
+ "loss": 3.0083,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "step": 100
48
  },
49
  {
50
+ "epoch": 1.28,
51
+ "eval_loss": 3.1246652603149414,
52
+ "eval_runtime": 3.2285,
53
+ "eval_samples_per_second": 30.974,
54
+ "eval_steps_per_second": 1.239,
55
  "eval_wer": 1.0,
56
  "step": 100
57
  }
58
  ],
59
+ "max_steps": 780,
60
  "num_train_epochs": 10,
61
+ "total_flos": 1.5267594688539034e+18,
62
  "trial_name": null,
63
  "trial_params": null
64
  }
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:470abf25a211ef5ec3a9821c884f2749839bee5e95f633b168d26f90c1809409
3
  size 3055
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31becb5e425437ea470512000be6c31bb502b71fe15942ff8cf0cc01eb8267a0
3
  size 3055
checkpoint-80/config.json DELETED
@@ -1,115 +0,0 @@
1
- {
2
- "_name_or_path": "KBLab/wav2vec2-large-voxrex",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.0,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 768,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.0,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "hidden_act": "gelu",
56
- "hidden_dropout": 0.0,
57
- "hidden_size": 1024,
58
- "initializer_range": 0.02,
59
- "intermediate_size": 4096,
60
- "layer_norm_eps": 1e-05,
61
- "layerdrop": 0.0,
62
- "mask_channel_length": 10,
63
- "mask_channel_min_space": 1,
64
- "mask_channel_other": 0.0,
65
- "mask_channel_prob": 0.0,
66
- "mask_channel_selection": "static",
67
- "mask_feature_length": 64,
68
- "mask_feature_min_masks": 0,
69
- "mask_feature_prob": 0.25,
70
- "mask_time_length": 10,
71
- "mask_time_min_masks": 2,
72
- "mask_time_min_space": 1,
73
- "mask_time_other": 0.0,
74
- "mask_time_prob": 0.75,
75
- "mask_time_selection": "static",
76
- "model_type": "wav2vec2",
77
- "num_adapter_layers": 3,
78
- "num_attention_heads": 16,
79
- "num_codevector_groups": 2,
80
- "num_codevectors_per_group": 320,
81
- "num_conv_pos_embedding_groups": 16,
82
- "num_conv_pos_embeddings": 128,
83
- "num_feat_extract_layers": 7,
84
- "num_hidden_layers": 24,
85
- "num_negatives": 100,
86
- "output_hidden_size": 1024,
87
- "pad_token_id": 31,
88
- "proj_codevector_dim": 768,
89
- "tdnn_dilation": [
90
- 1,
91
- 2,
92
- 3,
93
- 1,
94
- 1
95
- ],
96
- "tdnn_dim": [
97
- 512,
98
- 512,
99
- 512,
100
- 512,
101
- 1500
102
- ],
103
- "tdnn_kernel": [
104
- 5,
105
- 3,
106
- 3,
107
- 1,
108
- 1
109
- ],
110
- "torch_dtype": "float32",
111
- "transformers_version": "4.17.0.dev0",
112
- "use_weighted_layer_sum": false,
113
- "vocab_size": 34,
114
- "xvector_output_dim": 512
115
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-80/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b638480e632c1a7b61ada4cfb4e085fc6687b2cc7006a885f1727d7d5a41f17
3
- size 2490337361
 
 
 
 
checkpoint-80/preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0,
7
- "return_attention_mask": true,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-80/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d243142fcbd07def1347c28af05f32d03aa37c7536de513cc8503d261cf6d773
3
- size 1262063089
 
 
 
 
checkpoint-80/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d26c7900baa445f707767794b45729eeb213e1b2dc71ded73ca14c4c4a7d25a4
3
- size 14567
 
 
 
 
checkpoint-80/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2409ebedbf30ba97120ef308e1ac839369eea668dd8be70d109b5b98fe71857f
3
- size 559
 
 
 
 
checkpoint-80/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:721652adf9b5a54ee9b9baf3dac2052ee6350d8d896360a6439d75c8bb179b00
3
- size 623
 
 
 
 
checkpoint-80/trainer_state.json DELETED
@@ -1,100 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.0662251655629138,
5
- "global_step": 80,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.13,
12
- "learning_rate": 4.9999999999999996e-05,
13
- "loss": 13.2048,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.26,
18
- "learning_rate": 7.46938775510204e-05,
19
- "loss": 26.5657,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.26,
24
- "eval_loss": 17.199691772460938,
25
- "eval_runtime": 5.248,
26
- "eval_samples_per_second": 18.674,
27
- "eval_steps_per_second": 0.762,
28
- "eval_wer": 1.0,
29
- "step": 20
30
- },
31
- {
32
- "epoch": 0.4,
33
- "learning_rate": 7.36734693877551e-05,
34
- "loss": 8.0091,
35
- "step": 30
36
- },
37
- {
38
- "epoch": 0.53,
39
- "learning_rate": 7.265306122448979e-05,
40
- "loss": 7.6332,
41
- "step": 40
42
- },
43
- {
44
- "epoch": 0.53,
45
- "eval_loss": 4.783994197845459,
46
- "eval_runtime": 4.9818,
47
- "eval_samples_per_second": 19.672,
48
- "eval_steps_per_second": 0.803,
49
- "eval_wer": 1.0,
50
- "step": 40
51
- },
52
- {
53
- "epoch": 0.66,
54
- "learning_rate": 7.163265306122449e-05,
55
- "loss": 3.8261,
56
- "step": 50
57
- },
58
- {
59
- "epoch": 0.79,
60
- "learning_rate": 7.061224489795918e-05,
61
- "loss": 3.8717,
62
- "step": 60
63
- },
64
- {
65
- "epoch": 0.79,
66
- "eval_loss": 3.6783852577209473,
67
- "eval_runtime": 4.9141,
68
- "eval_samples_per_second": 19.943,
69
- "eval_steps_per_second": 0.814,
70
- "eval_wer": 1.0,
71
- "step": 60
72
- },
73
- {
74
- "epoch": 0.93,
75
- "learning_rate": 6.959183673469387e-05,
76
- "loss": 3.7607,
77
- "step": 70
78
- },
79
- {
80
- "epoch": 1.07,
81
- "learning_rate": 6.857142857142857e-05,
82
- "loss": 3.638,
83
- "step": 80
84
- },
85
- {
86
- "epoch": 1.07,
87
- "eval_loss": 3.4979612827301025,
88
- "eval_runtime": 5.0758,
89
- "eval_samples_per_second": 19.307,
90
- "eval_steps_per_second": 0.788,
91
- "eval_wer": 1.0,
92
- "step": 80
93
- }
94
- ],
95
- "max_steps": 750,
96
- "num_train_epochs": 10,
97
- "total_flos": 1.61703171100896e+18,
98
- "trial_name": null,
99
- "trial_params": null
100
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-80/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:470abf25a211ef5ec3a9821c884f2749839bee5e95f633b168d26f90c1809409
3
- size 3055
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80ebb6ebc7a95b13fda5cdc22fab5fcfb9ffdb99ca0102065a7147116a7f9f3f
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71ffcd1b91ea379737ddde0779f635b716bdd28dd2319a1848a317ef50fd710
3
  size 1262063089