mprzibilla commited on
Commit
d9189cb
1 Parent(s): a476699

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c02091ac6b92226695a59577ab7d4dcb9cd98977e07434d7fa01e3945bc7df3
3
  size 721661957
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f725fd71ef00caf2260fb80555a8b5723469dec92b99bbc84e03d849d9b74bd4
3
  size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c61da1490c623ed4c01c66304040f3e326c2c1ff57ea40ea62eb04deab3d3a
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4cb3787d1fb0774c67e00d75488dc6e4149674b801dd96f43c0f9c673138dc
3
  size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9984e3cbd8716939c7fdea32bd39972cea55b1ab6285ee007229d989a1230b1c
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8afb52420cad66c8a51ec41653557522fbb3d25f06972216fc1b36496b8dedd
3
  size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e7730cfcb24b34ccf70f77b07c8d0d2b73e258077d413002a5407dd82e57b83
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52261608150abcba3c48f2dcd8ff39b806f986872115e36cf28df63486b380c1
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:130febd259da8024c5a724e8fe97e74d600f3893f62eaf38860a523a0936e692
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee898ce0fddae89e51bcaaf05334599fc0739fe80927e9c1b5ad3d042f919c7
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,169 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
- "global_step": 2844,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.842327337330669e-05,
13
- "loss": 24.5654,
14
  "step": 316
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
- "eval_loss": 3.3034040927886963,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 5.1223,
23
- "eval_samples_per_second": 27.332,
24
- "eval_steps_per_second": 3.514,
25
  "step": 316
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.140572951365757e-05,
30
- "loss": 3.4232,
31
- "step": 632
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 3.2738046646118164,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 5.4174,
40
- "eval_samples_per_second": 25.843,
41
- "eval_steps_per_second": 3.323,
42
- "step": 632
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.438818565400845e-05,
47
- "loss": 3.3815,
48
- "step": 948
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 1.0,
53
- "eval_loss": 3.2943756580352783,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 5.7568,
57
- "eval_samples_per_second": 24.319,
58
- "eval_steps_per_second": 3.127,
59
- "step": 948
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.737064179435932e-05,
64
- "loss": 3.3771,
65
- "step": 1264
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 1.0,
70
- "eval_loss": 3.3197929859161377,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 5.2197,
74
- "eval_samples_per_second": 26.821,
75
- "eval_steps_per_second": 3.448,
76
- "step": 1264
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.03530979347102e-05,
81
- "loss": 3.3742,
82
- "step": 1580
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 1.0,
87
- "eval_loss": 3.31402850151062,
88
- "eval_new_wer": 1.0,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 5.1197,
91
- "eval_samples_per_second": 27.345,
92
- "eval_steps_per_second": 3.516,
93
- "step": 1580
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.333555407506108e-05,
98
- "loss": 3.3683,
99
- "step": 1896
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 1.0,
104
- "eval_loss": 3.2977089881896973,
105
- "eval_new_wer": 1.0,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 5.4977,
108
- "eval_samples_per_second": 25.465,
109
- "eval_steps_per_second": 3.274,
110
- "step": 1896
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.631801021541196e-05,
115
- "loss": 3.3472,
116
- "step": 2212
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 1.0,
121
- "eval_loss": 3.3024919033050537,
122
- "eval_new_wer": 1.0,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 5.4129,
125
- "eval_samples_per_second": 25.864,
126
- "eval_steps_per_second": 3.325,
127
- "step": 2212
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9300466355762824e-05,
132
- "loss": 3.315,
133
- "step": 2528
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 1.0,
138
- "eval_loss": 3.3284974098205566,
139
- "eval_new_wer": 1.0,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 5.2334,
142
- "eval_samples_per_second": 26.751,
143
- "eval_steps_per_second": 3.439,
144
- "step": 2528
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.2282922496113704e-05,
149
- "loss": 3.2856,
150
- "step": 2844
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 1.0,
155
- "eval_loss": 3.2858822345733643,
156
- "eval_new_wer": 1.0,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 5.3674,
159
- "eval_samples_per_second": 26.083,
160
- "eval_steps_per_second": 3.354,
161
- "step": 2844
162
  }
163
  ],
164
  "max_steps": 4740,
165
  "num_train_epochs": 15,
166
- "total_flos": 2.104156079215534e+18,
167
  "trial_name": null,
168
  "trial_params": null
169
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 316,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.840106595602932e-05,
13
+ "loss": 26.6275,
14
  "step": 316
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
+ "eval_loss": 3.2884294986724854,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 5.0468,
23
+ "eval_samples_per_second": 27.741,
24
+ "eval_steps_per_second": 3.567,
25
  "step": 316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 4740,
29
  "num_train_epochs": 15,
30
+ "total_flos": 2.336192011118592e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6b9420bb6a966eb97bcd5f710a858bd94b0bf4baee2011fe4f71ce2c6c55bd
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a98ea4cc0a3e85c5ac6067cc39934b4325207661835f058515305611e4d4f88
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c61da1490c623ed4c01c66304040f3e326c2c1ff57ea40ea62eb04deab3d3a
3
  size 377646433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4cb3787d1fb0774c67e00d75488dc6e4149674b801dd96f43c0f9c673138dc
3
  size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6b9420bb6a966eb97bcd5f710a858bd94b0bf4baee2011fe4f71ce2c6c55bd
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a98ea4cc0a3e85c5ac6067cc39934b4325207661835f058515305611e4d4f88
3
  size 3387