joelniklaus commited on
Commit
d4a8b56
1 Parent(s): 2e3c552

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaab15388944543359e51e90cc1ade38ffb5c46258d39257061b5b24e463351a
3
  size 1668076741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c86c6ef1e9ed073386178347021ac781b8c02e77572c298de3cabb7139ff7bb
3
  size 1668076741
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c010fae9983d5070f0c0388b3eb868d3bd6b5a83d1368b702a466aed8c80be9
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567827f0a9fb4e7e213e112d1698cfdc1ed84a2aa6f8e8ac3a3d6499893d619a
3
  size 834053717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:652b49b8620dfd624403a02210a0c5bf694a92f75e9a2ccc8d38f6a933694a41
3
  size 17641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83595397313a34341269e19cddb27d1911528d5cb71c1c4021a0f77255c067d
3
  size 17641
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4eebdf9e1618d1076edbbc7c3b6520da70b1141cff74a85b3f87f1558b8930c0
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ec0071e4302006204e99cd369255e47065d131099e23d2fd8502b466b28f338
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fe1d6c51a97574d86dcafe0758db38a16709943cffb071f1dd8342878c794fe
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:843359b4ca576c0da051410b99ae90c5c724f142fad0be29b3fca9e912a14ce4
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -75,11 +75,80 @@
75
  "eval_samples_per_second": 23.567,
76
  "eval_steps_per_second": 0.589,
77
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
  ],
80
  "max_steps": 50000,
81
  "num_train_epochs": 9223372036854775807,
82
- "total_flos": 3.156662550528e+18,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
75
  "eval_samples_per_second": 23.567,
76
  "eval_steps_per_second": 0.589,
77
  "step": 10000
78
+ },
79
+ {
80
+ "epoch": 0.22,
81
+ "learning_rate": 2.7695129103900878e-05,
82
+ "loss": 0.8541,
83
+ "step": 11000
84
+ },
85
+ {
86
+ "epoch": 0.24,
87
+ "learning_rate": 2.7139335532657592e-05,
88
+ "loss": 0.8387,
89
+ "step": 12000
90
+ },
91
+ {
92
+ "epoch": 0.26,
93
+ "learning_rate": 2.6530459735832775e-05,
94
+ "loss": 0.8294,
95
+ "step": 13000
96
+ },
97
+ {
98
+ "epoch": 0.28,
99
+ "learning_rate": 2.5871164172365078e-05,
100
+ "loss": 0.8318,
101
+ "step": 14000
102
+ },
103
+ {
104
+ "epoch": 0.3,
105
+ "learning_rate": 2.5164331774003996e-05,
106
+ "loss": 0.8036,
107
+ "step": 15000
108
+ },
109
+ {
110
+ "epoch": 0.32,
111
+ "learning_rate": 2.441459810041615e-05,
112
+ "loss": 0.8184,
113
+ "step": 16000
114
+ },
115
+ {
116
+ "epoch": 0.34,
117
+ "learning_rate": 2.3622237699269646e-05,
118
+ "loss": 0.8164,
119
+ "step": 17000
120
+ },
121
+ {
122
+ "epoch": 0.36,
123
+ "learning_rate": 2.2792174446485267e-05,
124
+ "loss": 0.8178,
125
+ "step": 18000
126
+ },
127
+ {
128
+ "epoch": 0.38,
129
+ "learning_rate": 2.192803799746817e-05,
130
+ "loss": 0.8153,
131
+ "step": 19000
132
+ },
133
+ {
134
+ "epoch": 0.4,
135
+ "learning_rate": 2.1033607001041156e-05,
136
+ "loss": 0.8333,
137
+ "step": 20000
138
+ },
139
+ {
140
+ "epoch": 0.4,
141
+ "eval_accuracy": 0.8461335925242551,
142
+ "eval_loss": 0.6882692575454712,
143
+ "eval_runtime": 222.9746,
144
+ "eval_samples_per_second": 22.424,
145
+ "eval_steps_per_second": 0.561,
146
+ "step": 20000
147
  }
148
  ],
149
  "max_steps": 50000,
150
  "num_train_epochs": 9223372036854775807,
151
+ "total_flos": 6.313325101056e+18,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c010fae9983d5070f0c0388b3eb868d3bd6b5a83d1368b702a466aed8c80be9
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:567827f0a9fb4e7e213e112d1698cfdc1ed84a2aa6f8e8ac3a3d6499893d619a
3
  size 834053717