boumehdi commited on
Commit
f476640
1 Parent(s): 128a063

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +2 -2
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +16 -166
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fe13c82def9230cab968b5934aa6f0bcc8b78357f3b7621735f527fda41950f
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be64e5ceb29b7d7ff2c79a1fe0408c79c9bf6d37958658d90e1023613ba1ade9
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f621c5200d8a92eb99ef0a6871e32ebf48d4916ef80959751843d842c21ca87
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f5fcaff3ffe4fc4a7b4ef93ce03bbda4b080ca208b173b45c21454d91a4b60
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6ebaa40da56eb8238486a736945960ff693e5e0ef75539b04d1fe0ea16420d1
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d78fbb1a5438682b3e56cdcbcd670790d52471dfc41d3e1df54743de1874ab2
3
+ size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21459ca4f1dfeb8d3a2ce03a6e448827af584d3ce458be1ce9e9ef9fe422742c
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc40a4be6a52cee4d7658df4041e660ffa02b0d8b5bd143bb8bb397f7b71b1a5
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f9a03f06b9dc92ae9cd16600ed9dad87df8b513d9229f31412efddb4e064bf6
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd51f54c3c9ede3156fe9d077aa80e3c39fad9c7aefb825e83dd16225db78dd
3
  size 627
trainer_state.json CHANGED
@@ -1,187 +1,37 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.96969696969697,
5
- "global_step": 2800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.91,
12
- "learning_rate": 9.999696951330385e-06,
13
- "loss": 0.0297,
14
  "step": 150
15
  },
16
  {
17
- "epoch": 1.82,
18
- "learning_rate": 9.998787805321536e-06,
19
- "loss": 0.0288,
20
  "step": 300
21
  },
22
  {
23
- "epoch": 2.42,
24
- "eval_loss": 0.22611959278583527,
25
- "eval_runtime": 391.6246,
26
- "eval_samples_per_second": 12.034,
27
- "eval_steps_per_second": 1.507,
28
- "eval_wer": 0.1515768056968464,
29
  "step": 400
30
- },
31
- {
32
- "epoch": 2.73,
33
- "learning_rate": 9.997878659312687e-06,
34
- "loss": 0.03,
35
- "step": 450
36
- },
37
- {
38
- "epoch": 3.64,
39
- "learning_rate": 9.996969513303838e-06,
40
- "loss": 0.0268,
41
- "step": 600
42
- },
43
- {
44
- "epoch": 4.55,
45
- "learning_rate": 9.996060367294988e-06,
46
- "loss": 0.0288,
47
- "step": 750
48
- },
49
- {
50
- "epoch": 4.85,
51
- "eval_loss": 0.22758068144321442,
52
- "eval_runtime": 253.9815,
53
- "eval_samples_per_second": 18.556,
54
- "eval_steps_per_second": 2.323,
55
- "eval_wer": 0.1512800949474398,
56
- "step": 800
57
- },
58
- {
59
- "epoch": 5.45,
60
- "learning_rate": 9.99515122128614e-06,
61
- "loss": 0.0271,
62
- "step": 900
63
- },
64
- {
65
- "epoch": 6.36,
66
- "learning_rate": 9.99424207527729e-06,
67
- "loss": 0.0254,
68
- "step": 1050
69
- },
70
- {
71
- "epoch": 7.27,
72
- "learning_rate": 9.993332929268441e-06,
73
- "loss": 0.0279,
74
- "step": 1200
75
- },
76
- {
77
- "epoch": 7.27,
78
- "eval_loss": 0.22872433066368103,
79
- "eval_runtime": 254.213,
80
- "eval_samples_per_second": 18.54,
81
- "eval_steps_per_second": 2.321,
82
- "eval_wer": 0.15200067819599863,
83
- "step": 1200
84
- },
85
- {
86
- "epoch": 8.18,
87
- "learning_rate": 9.992423783259593e-06,
88
- "loss": 0.0275,
89
- "step": 1350
90
- },
91
- {
92
- "epoch": 9.09,
93
- "learning_rate": 9.991514637250743e-06,
94
- "loss": 0.0264,
95
- "step": 1500
96
- },
97
- {
98
- "epoch": 9.7,
99
- "eval_loss": 0.2296951413154602,
100
- "eval_runtime": 261.105,
101
- "eval_samples_per_second": 18.05,
102
- "eval_steps_per_second": 2.26,
103
- "eval_wer": 0.15102577144794846,
104
- "step": 1600
105
- },
106
- {
107
- "epoch": 10.0,
108
- "learning_rate": 9.990605491241893e-06,
109
- "loss": 0.0264,
110
- "step": 1650
111
- },
112
- {
113
- "epoch": 10.91,
114
- "learning_rate": 9.989696345233046e-06,
115
- "loss": 0.0266,
116
- "step": 1800
117
- },
118
- {
119
- "epoch": 11.82,
120
- "learning_rate": 9.988787199224195e-06,
121
- "loss": 0.0261,
122
- "step": 1950
123
- },
124
- {
125
- "epoch": 12.12,
126
- "eval_loss": 0.23657573759555817,
127
- "eval_runtime": 260.075,
128
- "eval_samples_per_second": 18.122,
129
- "eval_steps_per_second": 2.269,
130
- "eval_wer": 0.1510681586978637,
131
- "step": 2000
132
- },
133
- {
134
- "epoch": 12.73,
135
- "learning_rate": 9.987878053215348e-06,
136
- "loss": 0.0261,
137
- "step": 2100
138
- },
139
- {
140
- "epoch": 13.64,
141
- "learning_rate": 9.986968907206498e-06,
142
- "loss": 0.0255,
143
- "step": 2250
144
- },
145
- {
146
- "epoch": 14.55,
147
- "learning_rate": 9.98605976119765e-06,
148
- "loss": 0.0268,
149
- "step": 2400
150
- },
151
- {
152
- "epoch": 14.55,
153
- "eval_loss": 0.2373363971710205,
154
- "eval_runtime": 262.4635,
155
- "eval_samples_per_second": 17.957,
156
- "eval_steps_per_second": 2.248,
157
- "eval_wer": 0.15081383519837233,
158
- "step": 2400
159
- },
160
- {
161
- "epoch": 15.45,
162
- "learning_rate": 9.9851506151888e-06,
163
- "loss": 0.0259,
164
- "step": 2550
165
- },
166
- {
167
- "epoch": 16.36,
168
- "learning_rate": 9.984247530153343e-06,
169
- "loss": 0.0253,
170
- "step": 2700
171
- },
172
- {
173
- "epoch": 16.97,
174
- "eval_loss": 0.2379976361989975,
175
- "eval_runtime": 270.8378,
176
- "eval_samples_per_second": 17.402,
177
- "eval_steps_per_second": 2.178,
178
- "eval_wer": 0.15098338419803323,
179
- "step": 2800
180
  }
181
  ],
182
- "max_steps": 1650000,
183
  "num_train_epochs": 10000,
184
- "total_flos": 6.3104115166245315e+19,
185
  "trial_name": null,
186
  "trial_params": null
187
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.379540400296516,
5
+ "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.89,
12
+ "learning_rate": 9.999702363235908e-06,
13
+ "loss": 0.0354,
14
  "step": 150
15
  },
16
  {
17
+ "epoch": 1.78,
18
+ "learning_rate": 9.99880945294363e-06,
19
+ "loss": 0.0336,
20
  "step": 300
21
  },
22
  {
23
+ "epoch": 2.38,
24
+ "eval_loss": 0.19874997437000275,
25
+ "eval_runtime": 421.2238,
26
+ "eval_samples_per_second": 11.438,
27
+ "eval_steps_per_second": 1.432,
28
+ "eval_wer": 0.15169214199908868,
29
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
  ],
32
+ "max_steps": 1680000,
33
  "num_train_epochs": 10000,
34
+ "total_flos": 9.154271872958712e+18,
35
  "trial_name": null,
36
  "trial_params": null
37
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca2239b04ab2d8f9a4cd4cd382fd3ed21a0d89184c4dfe5d7b09576582207cb6
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa3cfd4e4131e4651786c3cda673f577d5bd99579c07be55652efaf86dc1600
3
  size 3323