ihanif commited on
Commit
91c5694
1 Parent(s): 6df5c6e

Training in progress, step 200

Browse files
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138a6ddac36f3135f9ce799bc0aa0c6ee6e8510cecdd696448298fce2403ddb0
3
  size 1934160645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f4f0b48abf12f4a6e8914530132e9861cac4dcdd8f87a67678ef78289aa1a3
3
  size 1934160645
checkpoint-200/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d091c8707cb0763632c8b311f2d581a51bd141ec488b3c948362989bfac0bdda
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187c1d7674c7d7e27b2030f3c8b472ab80d8863925d52fe41abcfd44c6675a3a
3
  size 967102601
checkpoint-200/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b22328e86e2f01e3b161f44df6b51159ec6b4a940fe644d7490ef269503f585
3
- size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e013f097aa68e60ca0b4bcb926e5e1bd90fa0a3de8c1cd3ec746668dceec1e
3
+ size 14575
checkpoint-200/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ee58f3dc2c47aec3ec5dbf8581a593f6a668e46904253580e157d97de3e149
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b027835501bda1b5e4d5c8ad8afa02b3a51be5e6b8838b0f237818a9f7de87e
3
  size 557
checkpoint-200/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e82e796ecfbb8fa98125f306d5ff56f81ebaf5a863f8fefc032c86533b2fdf8f
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3323acea994593447b66bf97429c79bdd645fca1d4f541f2568e5326da64ecc7
3
  size 627
checkpoint-200/trainer_state.json CHANGED
@@ -1,156 +1,156 @@
1
  {
2
- "best_metric": 139.37651331719127,
3
- "best_model_checkpoint": "./checkpoint-100",
4
- "epoch": 2.377581120943953,
5
  "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.12,
12
- "learning_rate": 3e-06,
13
- "loss": 4.1495,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.24,
18
- "learning_rate": 6.333333333333333e-06,
19
- "loss": 2.9287,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.35,
24
- "learning_rate": 9.666666666666667e-06,
25
- "loss": 2.0462,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.47,
30
- "learning_rate": 9.666666666666667e-06,
31
- "loss": 1.6138,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.59,
36
- "learning_rate": 9.296296296296296e-06,
37
- "loss": 1.3862,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 0.71,
42
- "learning_rate": 8.925925925925927e-06,
43
- "loss": 1.2604,
44
  "step": 60
45
  },
46
  {
47
- "epoch": 0.83,
48
- "learning_rate": 8.555555555555556e-06,
49
- "loss": 1.1436,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 0.94,
54
- "learning_rate": 8.185185185185187e-06,
55
- "loss": 1.168,
56
  "step": 80
57
  },
58
  {
59
- "epoch": 1.07,
60
- "learning_rate": 7.814814814814816e-06,
61
- "loss": 1.1041,
62
  "step": 90
63
  },
64
  {
65
- "epoch": 1.19,
66
- "learning_rate": 7.444444444444445e-06,
67
- "loss": 0.9683,
68
  "step": 100
69
  },
70
  {
71
- "epoch": 1.19,
72
- "eval_cer": 131.61659035460045,
73
- "eval_loss": 0.8811978697776794,
74
- "eval_runtime": 593.3196,
75
- "eval_samples_per_second": 0.863,
76
- "eval_steps_per_second": 0.431,
77
- "eval_wer": 139.37651331719127,
78
  "step": 100
79
  },
80
  {
81
- "epoch": 1.31,
82
- "learning_rate": 7.074074074074074e-06,
83
- "loss": 0.909,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.42,
88
- "learning_rate": 6.703703703703704e-06,
89
- "loss": 0.9213,
90
  "step": 120
91
  },
92
  {
93
- "epoch": 1.54,
94
- "learning_rate": 6.333333333333333e-06,
95
- "loss": 0.9092,
96
  "step": 130
97
  },
98
  {
99
- "epoch": 1.66,
100
- "learning_rate": 5.962962962962963e-06,
101
- "loss": 0.8481,
102
  "step": 140
103
  },
104
  {
105
- "epoch": 1.78,
106
- "learning_rate": 5.5925925925925926e-06,
107
- "loss": 0.8471,
108
  "step": 150
109
  },
110
  {
111
- "epoch": 1.9,
112
- "learning_rate": 5.2222222222222226e-06,
113
- "loss": 0.8504,
114
  "step": 160
115
  },
116
  {
117
- "epoch": 2.02,
118
- "learning_rate": 4.851851851851852e-06,
119
- "loss": 0.8264,
120
  "step": 170
121
  },
122
  {
123
- "epoch": 2.14,
124
- "learning_rate": 4.481481481481482e-06,
125
- "loss": 0.7236,
126
  "step": 180
127
  },
128
  {
129
- "epoch": 2.26,
130
- "learning_rate": 4.111111111111111e-06,
131
- "loss": 0.6898,
132
  "step": 190
133
  },
134
  {
135
- "epoch": 2.38,
136
- "learning_rate": 3.740740740740741e-06,
137
- "loss": 0.6848,
138
  "step": 200
139
  },
140
  {
141
- "epoch": 2.38,
142
- "eval_cer": 151.33685371478225,
143
- "eval_loss": 0.7542899250984192,
144
- "eval_runtime": 551.6472,
145
- "eval_samples_per_second": 0.928,
146
- "eval_steps_per_second": 0.464,
147
- "eval_wer": 145.9972760290557,
148
  "step": 200
149
  }
150
  ],
151
- "max_steps": 300,
152
- "num_train_epochs": 4,
153
- "total_flos": 1.86022149046272e+18,
154
  "trial_name": null,
155
  "trial_params": null
156
  }
 
1
  {
2
+ "best_metric": 52.66343825665859,
3
+ "best_model_checkpoint": "./checkpoint-200",
4
+ "epoch": 4.752941176470588,
5
  "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.24,
12
+ "learning_rate": 2.666666666666667e-06,
13
+ "loss": 4.3134,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.47,
18
+ "learning_rate": 6e-06,
19
+ "loss": 2.9329,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.71,
24
+ "learning_rate": 9.333333333333334e-06,
25
+ "loss": 2.0584,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.94,
30
+ "learning_rate": 9.829787234042554e-06,
31
+ "loss": 1.566,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 1.19,
36
+ "learning_rate": 9.617021276595745e-06,
37
+ "loss": 1.3777,
38
  "step": 50
39
  },
40
  {
41
+ "epoch": 1.42,
42
+ "learning_rate": 9.404255319148937e-06,
43
+ "loss": 1.1469,
44
  "step": 60
45
  },
46
  {
47
+ "epoch": 1.66,
48
+ "learning_rate": 9.191489361702128e-06,
49
+ "loss": 1.0638,
50
  "step": 70
51
  },
52
  {
53
+ "epoch": 1.89,
54
+ "learning_rate": 8.97872340425532e-06,
55
+ "loss": 0.9974,
56
  "step": 80
57
  },
58
  {
59
+ "epoch": 2.14,
60
+ "learning_rate": 8.765957446808512e-06,
61
+ "loss": 0.9615,
62
  "step": 90
63
  },
64
  {
65
+ "epoch": 2.38,
66
+ "learning_rate": 8.553191489361703e-06,
67
+ "loss": 0.8262,
68
  "step": 100
69
  },
70
  {
71
+ "epoch": 2.38,
72
+ "eval_cer": 149.33157314260887,
73
+ "eval_loss": 0.8188337683677673,
74
+ "eval_runtime": 459.9395,
75
+ "eval_samples_per_second": 1.113,
76
+ "eval_steps_per_second": 0.139,
77
+ "eval_wer": 146.85230024213075,
78
  "step": 100
79
  },
80
  {
81
+ "epoch": 2.61,
82
+ "learning_rate": 8.340425531914894e-06,
83
+ "loss": 0.7986,
84
  "step": 110
85
  },
86
  {
87
+ "epoch": 2.85,
88
+ "learning_rate": 8.127659574468085e-06,
89
+ "loss": 0.7707,
90
  "step": 120
91
  },
92
  {
93
+ "epoch": 3.09,
94
+ "learning_rate": 7.914893617021278e-06,
95
+ "loss": 0.7623,
96
  "step": 130
97
  },
98
  {
99
+ "epoch": 3.33,
100
+ "learning_rate": 7.702127659574469e-06,
101
+ "loss": 0.6451,
102
  "step": 140
103
  },
104
  {
105
+ "epoch": 3.56,
106
+ "learning_rate": 7.48936170212766e-06,
107
+ "loss": 0.6205,
108
  "step": 150
109
  },
110
  {
111
+ "epoch": 3.8,
112
+ "learning_rate": 7.2765957446808524e-06,
113
+ "loss": 0.6097,
114
  "step": 160
115
  },
116
  {
117
+ "epoch": 4.05,
118
+ "learning_rate": 7.0638297872340434e-06,
119
+ "loss": 0.6217,
120
  "step": 170
121
  },
122
  {
123
+ "epoch": 4.28,
124
+ "learning_rate": 6.8510638297872344e-06,
125
+ "loss": 0.4584,
126
  "step": 180
127
  },
128
  {
129
+ "epoch": 4.52,
130
+ "learning_rate": 6.6382978723404254e-06,
131
+ "loss": 0.4929,
132
  "step": 190
133
  },
134
  {
135
+ "epoch": 4.75,
136
+ "learning_rate": 6.425531914893618e-06,
137
+ "loss": 0.4843,
138
  "step": 200
139
  },
140
  {
141
+ "epoch": 4.75,
142
+ "eval_cer": 22.099862972494236,
143
+ "eval_loss": 0.6699215173721313,
144
+ "eval_runtime": 1062.3991,
145
+ "eval_samples_per_second": 0.482,
146
+ "eval_steps_per_second": 0.06,
147
+ "eval_wer": 52.66343825665859,
148
  "step": 200
149
  }
150
  ],
151
+ "max_steps": 500,
152
+ "num_train_epochs": 12,
153
+ "total_flos": 3.72044298092544e+18,
154
  "trial_name": null,
155
  "trial_params": null
156
  }
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcf3a24325bb49c25270193816ed6035a253ca3ae300c31ffcec0afeb4229266
3
- size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5518cd575512706c66c89476ae9c64328c1bcac98ec06151bdc9afebf6c503fb
3
+ size 3643
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:946ddc68bfd507e62f2331943cb15a787bbdda6a4389be1fcfd25cff7522f970
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:187c1d7674c7d7e27b2030f3c8b472ab80d8863925d52fe41abcfd44c6675a3a
3
  size 967102601
runs/Dec20_20-28-50_129-146-32-172/events.out.tfevents.1671568135.129-146-32-172.151517.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:782028836de28c83d061b5870d65ffd6d0cfb2cf7e8386ba8180ee926b2c5967
3
- size 6185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bb9fad7bb496cbe780fc5b737f82d64309e3290a5409c43485e045f47471fed
3
+ size 8114