marinone94 commited on
Commit
71a393f
β€’
1 Parent(s): 6e3915f

Training in progress, step 500

Browse files
{checkpoint-200 β†’ checkpoint-500}/config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a972e696459b016bbc3aa28c954288fe7d65723673c7320476f0145a391b0440
3
- size 2490337361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f451d7dca685dc733896a3b9dde8e1707a70872842fb140a7b1b2bc09dc86e
3
+ size 2490337809
{checkpoint-200 β†’ checkpoint-500}/preprocessor_config.json RENAMED
File without changes
{checkpoint-200 β†’ checkpoint-500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3abb902c24b9b514e6979e814143734f3ac477116b25bc616e937ac37aa386b6
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee400442c70fe28b4032830f08f4c31605f74d5b5778895b5f30b7cad432bdcd
3
  size 1262063089
{checkpoint-200 β†’ checkpoint-500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85ca3e1865dd0f1bb02258869eab1dac68ed8a3dc9fd620878973f94cd7d1990
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60bd99fb8f131eb1ec63d9b9eee304bd3aa49c3b43a3a12613b8de4c1d42c4c6
3
  size 14567
{checkpoint-200 β†’ checkpoint-500}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb6982c29cd162f49aeb531674acf574eccd46a8f556bec596040d7c3b95200a
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fa4c7be44c959599b8b43bb9bc3371e9e4e5bbc5758b3ab5afcccfda3e72e67
3
  size 559
{checkpoint-200 β†’ checkpoint-500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:812e7669c0071d762b7ab2054d1e4ba7a23692696045f03436f670d0f378252b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536ace7f76d669d6713c8de85eb8de0ed71bdc66a4ba89707e46295a79ac66a8
3
  size 623
{checkpoint-200 β†’ checkpoint-500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.19672131147541,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -84,11 +84,128 @@
84
  "eval_steps_per_second": 0.79,
85
  "eval_wer": 1.0,
86
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  }
88
  ],
89
  "max_steps": 4550,
90
  "num_train_epochs": 50,
91
- "total_flos": 3.115318423228506e+18,
92
  "trial_name": null,
93
  "trial_params": null
94
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.491803278688525,
5
+ "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
84
  "eval_steps_per_second": 0.79,
85
  "eval_wer": 1.0,
86
  "step": 200
87
+ },
88
+ {
89
+ "epoch": 2.42,
90
+ "learning_rate": 2.75e-05,
91
+ "loss": 3.079,
92
+ "step": 220
93
+ },
94
+ {
95
+ "epoch": 2.63,
96
+ "learning_rate": 3e-05,
97
+ "loss": 3.0677,
98
+ "step": 240
99
+ },
100
+ {
101
+ "epoch": 2.85,
102
+ "learning_rate": 3.2500000000000004e-05,
103
+ "loss": 3.0656,
104
+ "step": 260
105
+ },
106
+ {
107
+ "epoch": 3.08,
108
+ "learning_rate": 3.5000000000000004e-05,
109
+ "loss": 3.1463,
110
+ "step": 280
111
+ },
112
+ {
113
+ "epoch": 3.3,
114
+ "learning_rate": 3.75e-05,
115
+ "loss": 3.0573,
116
+ "step": 300
117
+ },
118
+ {
119
+ "epoch": 3.3,
120
+ "eval_loss": 3.0614514350891113,
121
+ "eval_runtime": 194.36,
122
+ "eval_samples_per_second": 24.918,
123
+ "eval_steps_per_second": 0.782,
124
+ "eval_wer": 1.0,
125
+ "step": 300
126
+ },
127
+ {
128
+ "epoch": 3.51,
129
+ "learning_rate": 4e-05,
130
+ "loss": 3.0511,
131
+ "step": 320
132
+ },
133
+ {
134
+ "epoch": 3.73,
135
+ "learning_rate": 4.25e-05,
136
+ "loss": 3.0358,
137
+ "step": 340
138
+ },
139
+ {
140
+ "epoch": 3.95,
141
+ "learning_rate": 4.4999999999999996e-05,
142
+ "loss": 3.0416,
143
+ "step": 360
144
+ },
145
+ {
146
+ "epoch": 4.17,
147
+ "learning_rate": 4.75e-05,
148
+ "loss": 3.1053,
149
+ "step": 380
150
+ },
151
+ {
152
+ "epoch": 4.39,
153
+ "learning_rate": 5e-05,
154
+ "loss": 3.0314,
155
+ "step": 400
156
+ },
157
+ {
158
+ "epoch": 4.39,
159
+ "eval_loss": 3.0990231037139893,
160
+ "eval_runtime": 198.3688,
161
+ "eval_samples_per_second": 24.414,
162
+ "eval_steps_per_second": 0.766,
163
+ "eval_wer": 1.0,
164
+ "step": 400
165
+ },
166
+ {
167
+ "epoch": 4.61,
168
+ "learning_rate": 5.25e-05,
169
+ "loss": 3.0309,
170
+ "step": 420
171
+ },
172
+ {
173
+ "epoch": 4.83,
174
+ "learning_rate": 5.5e-05,
175
+ "loss": 3.0259,
176
+ "step": 440
177
+ },
178
+ {
179
+ "epoch": 5.05,
180
+ "learning_rate": 5.75e-05,
181
+ "loss": 3.0998,
182
+ "step": 460
183
+ },
184
+ {
185
+ "epoch": 5.27,
186
+ "learning_rate": 6e-05,
187
+ "loss": 3.0152,
188
+ "step": 480
189
+ },
190
+ {
191
+ "epoch": 5.49,
192
+ "learning_rate": 6.25e-05,
193
+ "loss": 3.0129,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 5.49,
198
+ "eval_loss": 3.039973497390747,
199
+ "eval_runtime": 190.8567,
200
+ "eval_samples_per_second": 25.375,
201
+ "eval_steps_per_second": 0.796,
202
+ "eval_wer": 1.0,
203
+ "step": 500
204
  }
205
  ],
206
  "max_steps": 4550,
207
  "num_train_epochs": 50,
208
+ "total_flos": 7.741689365780442e+18,
209
  "trial_name": null,
210
  "trial_params": null
211
  }
{checkpoint-200 β†’ checkpoint-500}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c7e3ec36d37a20298023cfa6ac730ae02e82d170ef8859ec5740936e2bc809
3
  size 1262063089
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee400442c70fe28b4032830f08f4c31605f74d5b5778895b5f30b7cad432bdcd
3
  size 1262063089