inflaton commited on
Commit
5c70f1f
1 Parent(s): c6eea0c

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:357873f897ac28dcafef3fa3fa53be07c2ae1ce0ee7c067b30c199d7205c8456
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c6add97730ff24d5a3da84b4e4b1d4c048b393ced862b071e58f104bef795f
3
  size 1340618660
run-4/checkpoint-1000/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3404825737265416,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": true,
@@ -9,39 +9,75 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.6702412868632708,
13
- "grad_norm": 20.911739349365234,
14
- "learning_rate": 1.9406388835210846e-05,
15
- "loss": 0.5519,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 500
17
  },
18
  {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.7670130729675293,
21
- "eval_loss": 0.4711463153362274,
22
- "eval_runtime": 8.8245,
23
- "eval_samples_per_second": 338.035,
24
- "eval_steps_per_second": 21.191,
25
- "step": 746
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  {
28
- "epoch": 1.3404825737265416,
29
- "grad_norm": 12.272890090942383,
30
- "learning_rate": 1.801225170624455e-05,
31
- "loss": 0.3843,
32
  "step": 1000
33
  }
34
  ],
35
  "logging_steps": 500,
36
- "max_steps": 7460,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 10,
39
  "save_steps": 500,
40
- "total_flos": 2375543264345280.0,
41
- "train_batch_size": 16,
42
  "trial_name": null,
43
  "trial_params": {
44
- "learning_rate": 2.0800525964177143e-05,
45
- "per_device_train_batch_size": 16
46
  }
47
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.347593582887701,
5
  "eval_steps": 500,
6
  "global_step": 1000,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6708012223243713,
14
+ "eval_loss": 0.625991940498352,
15
+ "eval_runtime": 8.8472,
16
+ "eval_samples_per_second": 337.169,
17
+ "eval_steps_per_second": 21.137,
18
+ "step": 187
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.6781763434410095,
23
+ "eval_loss": 0.5827099680900574,
24
+ "eval_runtime": 9.0096,
25
+ "eval_samples_per_second": 331.091,
26
+ "eval_steps_per_second": 20.756,
27
+ "step": 374
28
+ },
29
+ {
30
+ "epoch": 2.6737967914438503,
31
+ "grad_norm": 3.558804988861084,
32
+ "learning_rate": 7.935610461217781e-07,
33
+ "loss": 0.6309,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 3.0,
38
+ "eval_accuracy": 0.6935970783233643,
39
+ "eval_loss": 0.5479013323783875,
40
+ "eval_runtime": 8.9238,
41
+ "eval_samples_per_second": 334.274,
42
+ "eval_steps_per_second": 20.955,
43
+ "step": 561
44
+ },
45
+ {
46
+ "epoch": 4.0,
47
+ "eval_accuracy": 0.7096882462501526,
48
+ "eval_loss": 0.5329120755195618,
49
+ "eval_runtime": 9.016,
50
+ "eval_samples_per_second": 330.856,
51
+ "eval_steps_per_second": 20.741,
52
+ "step": 748
53
+ },
54
+ {
55
+ "epoch": 5.0,
56
+ "eval_accuracy": 0.7153871655464172,
57
+ "eval_loss": 0.5252031087875366,
58
+ "eval_runtime": 8.9923,
59
+ "eval_samples_per_second": 331.727,
60
+ "eval_steps_per_second": 20.795,
61
+ "step": 935
62
  },
63
  {
64
+ "epoch": 5.347593582887701,
65
+ "grad_norm": 8.559992790222168,
66
+ "learning_rate": 5.039402263693043e-07,
67
+ "loss": 0.5277,
68
  "step": 1000
69
  }
70
  ],
71
  "logging_steps": 500,
72
+ "max_steps": 1870,
73
  "num_input_tokens_seen": 0,
74
  "num_train_epochs": 10,
75
  "save_steps": 500,
76
+ "total_flos": 1.0895504027404356e+16,
77
+ "train_batch_size": 64,
78
  "trial_name": null,
79
  "trial_params": {
80
+ "learning_rate": 1.0831818658742517e-06,
81
+ "per_device_train_batch_size": 64
82
  }
83
  }
run-4/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89cc74c34edf1951d871132cf3a1da9f2b46dcfd82a2bc163f0230db648eb563
3
  size 1340618660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c6add97730ff24d5a3da84b4e4b1d4c048b393ced862b071e58f104bef795f
3
  size 1340618660
run-4/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b80c4786ba34af72675e39f6349c9cc2e69608b2d80a05e9d7b79fdfcc8288
3
  size 2681472237
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7621ef41e046c876527411669f95e570ebd99ea9faf74293b8c69537d7e8943
3
  size 2681472237
run-4/checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f544045d8bc120b0bef3c491fba9f1ed6efda96a8fe519bf19d9f17a0a9934ac
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cb98e7f396f8e059c8d47db8cc319556058ea6647e1637d6876176292860582
3
  size 14244
run-4/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51ca0a82c75bd9bdff0af759ace22c41be98f08f8640c3939ed524c15e9fe8b5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae0c55c677142dd9383a12ba34c0056a14808066d20b1f418ad7e86eee44721
3
  size 1064
run-4/checkpoint-500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6702412868632708,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": true,
@@ -9,23 +9,41 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.6702412868632708,
13
- "grad_norm": 20.911739349365234,
14
- "learning_rate": 1.9406388835210846e-05,
15
- "loss": 0.5519,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "step": 500
17
  }
18
  ],
19
  "logging_steps": 500,
20
- "max_steps": 7460,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 10,
23
  "save_steps": 500,
24
- "total_flos": 1189489874818176.0,
25
- "train_batch_size": 16,
26
  "trial_name": null,
27
  "trial_params": {
28
- "learning_rate": 2.0800525964177143e-05,
29
- "per_device_train_batch_size": 16
30
  }
31
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6737967914438503,
5
  "eval_steps": 500,
6
  "global_step": 500,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.6708012223243713,
14
+ "eval_loss": 0.625991940498352,
15
+ "eval_runtime": 8.8472,
16
+ "eval_samples_per_second": 337.169,
17
+ "eval_steps_per_second": 21.137,
18
+ "step": 187
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.6781763434410095,
23
+ "eval_loss": 0.5827099680900574,
24
+ "eval_runtime": 9.0096,
25
+ "eval_samples_per_second": 331.091,
26
+ "eval_steps_per_second": 20.756,
27
+ "step": 374
28
+ },
29
+ {
30
+ "epoch": 2.6737967914438503,
31
+ "grad_norm": 3.558804988861084,
32
+ "learning_rate": 7.935610461217781e-07,
33
+ "loss": 0.6309,
34
  "step": 500
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 1870,
39
  "num_input_tokens_seen": 0,
40
  "num_train_epochs": 10,
41
  "save_steps": 500,
42
+ "total_flos": 5457641009775480.0,
43
+ "train_batch_size": 64,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "learning_rate": 1.0831818658742517e-06,
47
+ "per_device_train_batch_size": 64
48
  }
49
  }
run-4/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1debc897a09b5f564d38d5c14028f41358d4308b7489c11fe2a009112f166360
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c539e782ca0b96266cde875b3e061950504d8d6e280a5e81a555601aa0081af
3
  size 5048
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1507950b2d37b737502f824dab70976a7fa7a07f6887612e84989d3ab0cc54db
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c539e782ca0b96266cde875b3e061950504d8d6e280a5e81a555601aa0081af
3
  size 5048