inflaton commited on
Commit
17c20c4
1 Parent(s): 5c70f1f

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16c6add97730ff24d5a3da84b4e4b1d4c048b393ced862b071e58f104bef795f
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb01bd303b793ee9b03ad47871bea5cce22f3ecc606ecad28e5a2b49ef71cbf2
3
  size 1340618660
run-4/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcb8607265c0a4ea8afae1705d5620fa2a9809f2bd490a1345d578ba329d0b52
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea953fecfc4fad3e35387ba469fa90ed9e322e9426d81de36786111e7bf48c07
3
  size 1340618660
run-4/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db09bcf133c4c734b91dee7a86864a8b21b7e4e52984275103321bc800c87b26
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c445d66fc183b8fbf2ae2d3fbee89feb7971c8d76b0108cecc8612e46078d41
3
  size 2681472237
run-4/checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54bea121f6144566f091fc3db2c9498700b03cbc999f62b42941d6dfd7452436
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cdfcea7c19f545a5c3968413dcd70b32c860d464dd9abffbe95cff448fb946
3
  size 14244
run-4/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b1af95b8ddef74323ac8e17640e09a8c39c3d51fee72cbedb72cb1b1c730802
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:909fd078c2a20042082d4e6fe343bda98dca49bf58030b79715ccefad3cb5ba3
3
  size 1064
run-4/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1debc897a09b5f564d38d5c14028f41358d4308b7489c11fe2a009112f166360
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c539e782ca0b96266cde875b3e061950504d8d6e280a5e81a555601aa0081af
3
  size 5048
run-4/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89ed448fc318f3ecdd36c5d66ee102fcce02e21de4569328455c4afb61e15aa6
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb01bd303b793ee9b03ad47871bea5cce22f3ecc606ecad28e5a2b49ef71cbf2
3
  size 1340618660
run-4/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0e3b078c466c46fa3dff7e95706500603eb1b3c3bc888c677e949d61086e1a
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9095cbe73139b23adab15574dc05e73b026fbef8c169e8f6feae6d0fcd55aa0
3
  size 2681472237
run-4/checkpoint-1500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d94e9959fe10619a73c116e156a1d9aa901f533fcb5d140a1ce44038a0d2c6fd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff9554be5662c81dc5324c0ba82c1e4075d9a58be3ecf2692e8a0f7f508c509
3
  size 14244
run-4/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a766639dd7e9d9b0ee2238ed8a2a2792b3c83372c8aebddd33d02bc6873839
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d47b1d965732dba3a35949a6b088e36e22183e5626c3903e52293c2605dc7f
3
  size 1064
run-4/checkpoint-1500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0107238605898123,
5
  "eval_steps": 500,
6
  "global_step": 1500,
7
  "is_hyper_param_search": true,
@@ -9,55 +9,109 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.6702412868632708,
13
- "grad_norm": 20.911739349365234,
14
- "learning_rate": 1.9406388835210846e-05,
15
- "loss": 0.5519,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 500
17
  },
18
  {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.7670130729675293,
21
- "eval_loss": 0.4711463153362274,
22
- "eval_runtime": 8.8245,
23
- "eval_samples_per_second": 338.035,
24
- "eval_steps_per_second": 21.191,
25
- "step": 746
26
- },
27
- {
28
- "epoch": 1.3404825737265416,
29
- "grad_norm": 12.272890090942383,
30
- "learning_rate": 1.801225170624455e-05,
31
- "loss": 0.3843,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "step": 1000
33
  },
34
  {
35
- "epoch": 2.0,
36
- "eval_accuracy": 0.7767348289489746,
37
- "eval_loss": 0.5532987117767334,
38
- "eval_runtime": 8.9725,
39
- "eval_samples_per_second": 332.46,
40
- "eval_steps_per_second": 20.841,
41
- "step": 1492
42
- },
43
- {
44
- "epoch": 2.0107238605898123,
45
- "grad_norm": 15.580121994018555,
46
- "learning_rate": 1.6618114577278254e-05,
47
- "loss": 0.2751,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  "step": 1500
49
  }
50
  ],
51
  "logging_steps": 500,
52
- "max_steps": 7460,
53
  "num_input_tokens_seen": 0,
54
  "num_train_epochs": 10,
55
  "save_steps": 500,
56
- "total_flos": 3564872964001584.0,
57
- "train_batch_size": 16,
58
  "trial_name": null,
59
  "trial_params": {
60
- "learning_rate": 2.0800525964177143e-05,
61
- "per_device_train_batch_size": 16
62
  }
63
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.02139037433155,
5
  "eval_steps": 500,
6
  "global_step": 1500,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6708012223243713,
14
+ "eval_loss": 0.625991940498352,
15
+ "eval_runtime": 8.8472,
16
+ "eval_samples_per_second": 337.169,
17
+ "eval_steps_per_second": 21.137,
18
+ "step": 187
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.6781763434410095,
23
+ "eval_loss": 0.5827099680900574,
24
+ "eval_runtime": 9.0096,
25
+ "eval_samples_per_second": 331.091,
26
+ "eval_steps_per_second": 20.756,
27
+ "step": 374
28
+ },
29
+ {
30
+ "epoch": 2.6737967914438503,
31
+ "grad_norm": 3.558804988861084,
32
+ "learning_rate": 7.935610461217781e-07,
33
+ "loss": 0.6309,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 3.0,
38
+ "eval_accuracy": 0.6935970783233643,
39
+ "eval_loss": 0.5479013323783875,
40
+ "eval_runtime": 8.9238,
41
+ "eval_samples_per_second": 334.274,
42
+ "eval_steps_per_second": 20.955,
43
+ "step": 561
44
+ },
45
+ {
46
+ "epoch": 4.0,
47
+ "eval_accuracy": 0.7096882462501526,
48
+ "eval_loss": 0.5329120755195618,
49
+ "eval_runtime": 9.016,
50
+ "eval_samples_per_second": 330.856,
51
+ "eval_steps_per_second": 20.741,
52
+ "step": 748
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_accuracy": 0.7153871655464172,
57
+ "eval_loss": 0.5252031087875366,
58
+ "eval_runtime": 8.9923,
59
+ "eval_samples_per_second": 331.727,
60
+ "eval_steps_per_second": 20.795,
61
+ "step": 935
62
+ },
63
+ {
64
+ "epoch": 5.347593582887701,
65
+ "grad_norm": 8.559992790222168,
66
+ "learning_rate": 5.039402263693043e-07,
67
+ "loss": 0.5277,
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 6.0,
72
+ "eval_accuracy": 0.7200804352760315,
73
+ "eval_loss": 0.5211048722267151,
74
+ "eval_runtime": 8.9939,
75
+ "eval_samples_per_second": 331.669,
76
+ "eval_steps_per_second": 20.792,
77
+ "step": 1122
78
+ },
79
+ {
80
+ "epoch": 7.0,
81
+ "eval_accuracy": 0.7207509279251099,
82
+ "eval_loss": 0.5175842046737671,
83
+ "eval_runtime": 8.9813,
84
+ "eval_samples_per_second": 332.134,
85
+ "eval_steps_per_second": 20.821,
86
+ "step": 1309
87
+ },
88
+ {
89
+ "epoch": 8.0,
90
+ "eval_accuracy": 0.721756637096405,
91
+ "eval_loss": 0.5173760652542114,
92
+ "eval_runtime": 9.0321,
93
+ "eval_samples_per_second": 330.265,
94
+ "eval_steps_per_second": 20.704,
95
+ "step": 1496
96
+ },
97
+ {
98
+ "epoch": 8.02139037433155,
99
+ "grad_norm": 6.5567402839660645,
100
+ "learning_rate": 2.1431940661683059e-07,
101
+ "loss": 0.5002,
102
  "step": 1500
103
  }
104
  ],
105
  "logging_steps": 500,
106
+ "max_steps": 1870,
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 10,
109
  "save_steps": 500,
110
+ "total_flos": 1.6341528697599528e+16,
111
+ "train_batch_size": 64,
112
  "trial_name": null,
113
  "trial_params": {
114
+ "learning_rate": 1.0831818658742517e-06,
115
+ "per_device_train_batch_size": 64
116
  }
117
  }
run-4/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1debc897a09b5f564d38d5c14028f41358d4308b7489c11fe2a009112f166360
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c539e782ca0b96266cde875b3e061950504d8d6e280a5e81a555601aa0081af
3
  size 5048