mprzibilla commited on
Commit
bd3e6fa
1 Parent(s): b6572fa

Training in progress, step 6700

Browse files
config.json CHANGED
@@ -110,7 +110,7 @@
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
- "transformers_version": "4.18.0",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
 
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
+ "transformers_version": "4.23.1",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
last-checkpoint/config.json CHANGED
@@ -110,7 +110,7 @@
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
- "transformers_version": "4.18.0",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
 
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
+ "transformers_version": "4.23.1",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbc0864acfd13b716f75d307bf48aada3819f622ff0bd60c9128105ebfcff8e
3
- size 174303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f482282dd625d07e4ef01233b70e2e8c1030c46fe2ebeb4b1901cd1670300157
3
+ size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:887961831a0f30b4269cdd28f6acde7f7d51f9a06abaabe2de32501df0b1860f
3
- size 377644503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65da033b1e874758bedcfe07b3b6d54ea6005c90eead128adc27e7b5d7f21527
3
+ size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93198e96e3eab53a5e609a281fb99228f51dc43ea5bfb0348b2d825a7a3ab647
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c714b776c16a9ba181a27bc6c2e7d70ca6d2267f5b7991f3fd1c9388171ab7
3
+ size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61823d7f75e62090d2b8f678678ba83a75ec4c530252db1f2a051fa6494b9b82
3
- size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1873a3085fec4186eecba9b874bba0fd9ad0105505617746123f3e2e0f8c3f
3
+ size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26fc2b87137deb440e0add509cb5c6eaf84f92619ce4a6bab36eea0195aa5885
3
- size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2310a544508b4524ba2632c6fbf45ea4b35663588ff13c743ebc9b477b456bba
3
+ size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,176 +1,32 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 250.0,
5
- "global_step": 134000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 25.0,
12
- "learning_rate": 9.474312647289867e-05,
13
- "loss": 20.2416,
14
- "step": 13400
15
  },
16
  {
17
- "epoch": 25.0,
18
- "eval_cer": 0.9767676767676767,
19
- "eval_loss": 3.248976230621338,
20
- "eval_runtime": 0.8563,
21
- "eval_samples_per_second": 210.199,
22
- "eval_steps_per_second": 26.859,
23
- "eval_wer": 1.0,
24
- "step": 13400
25
- },
26
- {
27
- "epoch": 50.0,
28
- "learning_rate": 8.422073841319718e-05,
29
- "loss": 2.7136,
30
- "step": 26800
31
- },
32
- {
33
- "epoch": 50.0,
34
- "eval_cer": 0.9222222222222223,
35
- "eval_loss": 3.162766218185425,
36
- "eval_runtime": 0.8572,
37
- "eval_samples_per_second": 209.98,
38
- "eval_steps_per_second": 26.831,
39
- "eval_wer": 1.0,
40
- "step": 26800
41
- },
42
- {
43
- "epoch": 75.0,
44
- "learning_rate": 7.369835035349568e-05,
45
- "loss": 2.574,
46
- "step": 40200
47
- },
48
- {
49
- "epoch": 75.0,
50
- "eval_cer": 0.9141414141414141,
51
- "eval_loss": 3.141190528869629,
52
- "eval_runtime": 0.8427,
53
- "eval_samples_per_second": 213.593,
54
- "eval_steps_per_second": 27.292,
55
- "eval_wer": 1.0,
56
- "step": 40200
57
- },
58
- {
59
- "epoch": 100.0,
60
- "learning_rate": 6.317674783974863e-05,
61
- "loss": 2.5132,
62
- "step": 53600
63
- },
64
- {
65
- "epoch": 100.0,
66
- "eval_cer": 0.9101010101010101,
67
- "eval_loss": 3.159379720687866,
68
- "eval_runtime": 0.8438,
69
- "eval_samples_per_second": 213.316,
70
- "eval_steps_per_second": 27.257,
71
- "eval_wer": 1.0,
72
- "step": 53600
73
- },
74
- {
75
- "epoch": 125.0,
76
- "learning_rate": 5.2655145326001575e-05,
77
- "loss": 2.4764,
78
- "step": 67000
79
- },
80
- {
81
- "epoch": 125.0,
82
- "eval_cer": 0.9050505050505051,
83
- "eval_loss": 3.132108449935913,
84
- "eval_runtime": 0.8431,
85
- "eval_samples_per_second": 213.503,
86
- "eval_steps_per_second": 27.281,
87
- "eval_wer": 1.0,
88
- "step": 67000
89
- },
90
- {
91
- "epoch": 150.0,
92
- "learning_rate": 4.2132757266300084e-05,
93
- "loss": 2.4522,
94
- "step": 80400
95
- },
96
- {
97
- "epoch": 150.0,
98
- "eval_cer": 0.9030303030303031,
99
- "eval_loss": 3.149836301803589,
100
- "eval_runtime": 0.842,
101
- "eval_samples_per_second": 213.776,
102
- "eval_steps_per_second": 27.316,
103
- "eval_wer": 1.0,
104
- "step": 80400
105
- },
106
- {
107
- "epoch": 175.0,
108
- "learning_rate": 3.1610369206598586e-05,
109
- "loss": 2.4366,
110
- "step": 93800
111
- },
112
- {
113
- "epoch": 175.0,
114
- "eval_cer": 0.901010101010101,
115
- "eval_loss": 3.154324769973755,
116
- "eval_runtime": 0.8423,
117
- "eval_samples_per_second": 213.693,
118
- "eval_steps_per_second": 27.305,
119
- "eval_wer": 1.0,
120
- "step": 93800
121
- },
122
- {
123
- "epoch": 200.0,
124
- "learning_rate": 2.1088766692851532e-05,
125
- "loss": 2.426,
126
- "step": 107200
127
- },
128
- {
129
- "epoch": 200.0,
130
- "eval_cer": 0.901010101010101,
131
- "eval_loss": 3.1591243743896484,
132
- "eval_runtime": 0.8434,
133
- "eval_samples_per_second": 213.431,
134
- "eval_steps_per_second": 27.272,
135
- "eval_wer": 1.0,
136
- "step": 107200
137
- },
138
- {
139
- "epoch": 225.0,
140
- "learning_rate": 1.056637863315004e-05,
141
- "loss": 2.4199,
142
- "step": 120600
143
- },
144
- {
145
- "epoch": 225.0,
146
- "eval_cer": 0.898989898989899,
147
- "eval_loss": 3.1549627780914307,
148
- "eval_runtime": 0.8433,
149
- "eval_samples_per_second": 213.458,
150
- "eval_steps_per_second": 27.275,
151
- "eval_wer": 1.0,
152
- "step": 120600
153
- },
154
- {
155
- "epoch": 250.0,
156
- "learning_rate": 4.4776119402985075e-08,
157
- "loss": 2.4161,
158
- "step": 134000
159
- },
160
- {
161
- "epoch": 250.0,
162
- "eval_cer": 0.898989898989899,
163
- "eval_loss": 3.1552622318267822,
164
- "eval_runtime": 0.843,
165
- "eval_samples_per_second": 213.521,
166
- "eval_steps_per_second": 27.283,
167
- "eval_wer": 1.0,
168
- "step": 134000
169
  }
170
  ],
171
- "max_steps": 134000,
172
- "num_train_epochs": 250,
173
- "total_flos": 1.2448746848771059e+20,
174
  "trial_name": null,
175
  "trial_params": null
176
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 6700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 10.0,
12
+ "learning_rate": 9.474941084053418e-05,
13
+ "loss": 4.2941,
14
+ "step": 6700
15
  },
16
  {
17
+ "epoch": 10.0,
18
+ "eval_cer": 0.4717171717171717,
19
+ "eval_loss": 3.055922508239746,
20
+ "eval_runtime": 6.8504,
21
+ "eval_samples_per_second": 26.276,
22
+ "eval_steps_per_second": 3.357,
23
+ "eval_wer": 0.8777777777777778,
24
+ "step": 6700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
+ "max_steps": 67000,
28
+ "num_train_epochs": 100,
29
+ "total_flos": 4.978806059229573e+18,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1381c52f948e9ae84ef99d734a89cfbb100461d77e8a2a97dc058cd76c194855
3
- size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b8e65eef88833700c90fcdc65889067bd9eefe856492cda02e2d22b5581adf
3
+ size 3451
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:887961831a0f30b4269cdd28f6acde7f7d51f9a06abaabe2de32501df0b1860f
3
- size 377644503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65da033b1e874758bedcfe07b3b6d54ea6005c90eead128adc27e7b5d7f21527
3
+ size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1381c52f948e9ae84ef99d734a89cfbb100461d77e8a2a97dc058cd76c194855
3
- size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b8e65eef88833700c90fcdc65889067bd9eefe856492cda02e2d22b5581adf
3
+ size 3451