stuser2023 commited on
Commit
c3f3955
1 Parent(s): 20beda7

Training in progress, epoch 4

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c70c13e5d04c26d3f1731e0285668d3d574cdf941b52bb758883a9e4048d8a5b
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9992887f6907f8916987f3e3d5b5cd5f9a668b44cebeae398a79e142aced221
3
  size 267832560
run-1/checkpoint-1072/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-1072/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1402dcc1a9ea22b48cf3678b18d6951fa650c0a1abcbdb78851001f2e57e5684
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9992887f6907f8916987f3e3d5b5cd5f9a668b44cebeae398a79e142aced221
3
  size 267832560
run-1/checkpoint-1072/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70f59b3cca6548a79c47d82a0ccc726e3efc9117974e48cd30348e502599ae4f
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fc9517b39355720ca587337ba7563051682d885fb178f3d079846331368417c
3
  size 535727290
run-1/checkpoint-1072/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb759deb6668166ad5e7f05d892e3ecfaf9feca83f38a2bee6fa5a21a732911
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32ddadbd4e9ce4478d8580a2b23d53d37db7c6d53eeed56612b957a8c00b76dc
3
+ size 14308
run-1/checkpoint-1072/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a12fcb7d20e914aced25dfedb2e8cc97640fb4986601ab9ad72a7518268a1cfe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3765e02ab4bcae5228f1b22b52d5b68ecef8a19a4fc159b8e8f029723c547b6
3
  size 1064
run-1/checkpoint-1072/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.5290831606897504,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-1072",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1072,
@@ -10,63 +10,67 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.4801884889602661,
14
- "eval_matthews_correlation": 0.4522326299878879,
15
- "eval_runtime": 0.6893,
16
- "eval_samples_per_second": 1513.095,
17
- "eval_steps_per_second": 95.747,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
- "learning_rate": 1.0329146614031314e-05,
23
- "loss": 0.4583,
 
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_loss": 0.5012368559837341,
29
- "eval_matthews_correlation": 0.4810794261012942,
30
- "eval_runtime": 1.1504,
31
- "eval_samples_per_second": 906.647,
32
- "eval_steps_per_second": 57.372,
33
  "step": 536
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_loss": 0.5001268982887268,
38
- "eval_matthews_correlation": 0.5202123212283704,
39
- "eval_runtime": 0.6949,
40
- "eval_samples_per_second": 1500.847,
41
- "eval_steps_per_second": 94.972,
42
  "step": 804
43
  },
44
  {
45
  "epoch": 3.73,
46
- "learning_rate": 4.180845058060293e-06,
47
- "loss": 0.2539,
 
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_loss": 0.561569333076477,
53
- "eval_matthews_correlation": 0.5290831606897504,
54
- "eval_runtime": 1.2387,
55
- "eval_samples_per_second": 842.006,
56
- "eval_steps_per_second": 53.281,
57
  "step": 1072
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 1340,
62
- "num_train_epochs": 5,
 
63
  "save_steps": 500,
64
- "total_flos": 191113458530124.0,
 
65
  "trial_name": null,
66
  "trial_params": {
67
- "learning_rate": 1.6477448170002332e-05,
68
- "num_train_epochs": 5,
69
  "per_device_train_batch_size": 32,
70
- "seed": 34
71
  }
72
  }
 
1
  {
2
+ "best_metric": 0.46849580082104064,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1072,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5035493969917297,
14
+ "eval_matthews_correlation": 0.4141806938515634,
15
+ "eval_runtime": 0.7419,
16
+ "eval_samples_per_second": 1405.832,
17
+ "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "grad_norm": 4.896795749664307,
23
+ "learning_rate": 5.453884401804003e-06,
24
+ "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.4967799484729767,
30
+ "eval_matthews_correlation": 0.44251427534495513,
31
+ "eval_runtime": 0.8433,
32
+ "eval_samples_per_second": 1236.743,
33
+ "eval_steps_per_second": 78.26,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_loss": 0.49384433031082153,
39
+ "eval_matthews_correlation": 0.46849580082104064,
40
+ "eval_runtime": 1.1521,
41
+ "eval_samples_per_second": 905.341,
42
+ "eval_steps_per_second": 57.289,
43
  "step": 804
44
  },
45
  {
46
  "epoch": 3.73,
47
+ "grad_norm": 8.912487030029297,
48
+ "learning_rate": 6.865029316956087e-07,
49
+ "loss": 0.3522,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_loss": 0.5385918021202087,
55
+ "eval_matthews_correlation": 0.430812413056651,
56
+ "eval_runtime": 0.8323,
57
+ "eval_samples_per_second": 1253.098,
58
+ "eval_steps_per_second": 79.295,
59
  "step": 1072
60
  }
61
  ],
62
  "logging_steps": 500,
63
+ "max_steps": 1072,
64
+ "num_input_tokens_seen": 0,
65
+ "num_train_epochs": 4,
66
  "save_steps": 500,
67
+ "total_flos": 192081608932020.0,
68
+ "train_batch_size": 32,
69
  "trial_name": null,
70
  "trial_params": {
71
+ "learning_rate": 1.0221265871912396e-05,
72
+ "num_train_epochs": 4,
73
  "per_device_train_batch_size": 32,
74
+ "seed": 8
75
  }
76
  }
run-1/checkpoint-1072/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1aa22f290642ee01c58508f6341455213d65d2b747968e31a42fb22e7b7ee22
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
+ size 4984
run-1/checkpoint-268/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-268/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ccdf837c8ebda4caa44681d02bb2d6ae6b721c17415e8e1a9bdb75243aad05a
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc9878783b0818c688c28abea8a5d61ace7dcab7d3f290e5b80a5cde21639f4f
3
  size 267832560
run-1/checkpoint-268/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2819fceb92563ce2132bfcd7742f3a0af37662847dd7e7950649954455007212
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e636934686b41a0f79de6208f706315ddc2a3cfa0585cee3ba2d34a439b209db
3
  size 535727290
run-1/checkpoint-268/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33a9c26fe1adbe5f1000a1f3d74c1bc587be4f7b6690bc5588a81f70304c9d14
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a7df60d1f16fe9e85b40c9a61615d24bef5f0ce05b0f715ad8da2204ad48db8
3
+ size 14308
run-1/checkpoint-268/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f9dd8f51fd82f223470aaeacd6e7b0481b1ae891ad5c9a9bed8c63aa42e6b3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:464e697567a35a41b5d63da1ecc000461d81bd321d5860f5da26db618d04341e
3
  size 1064
run-1/checkpoint-268/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.4522326299878879,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-268",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,24 +10,26 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.4801884889602661,
14
- "eval_matthews_correlation": 0.4522326299878879,
15
- "eval_runtime": 0.6893,
16
- "eval_samples_per_second": 1513.095,
17
- "eval_steps_per_second": 95.747,
18
  "step": 268
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 1340,
23
- "num_train_epochs": 5,
 
24
  "save_steps": 500,
25
  "total_flos": 0,
 
26
  "trial_name": null,
27
  "trial_params": {
28
- "learning_rate": 1.6477448170002332e-05,
29
- "num_train_epochs": 5,
30
  "per_device_train_batch_size": 32,
31
- "seed": 34
32
  }
33
  }
 
1
  {
2
+ "best_metric": 0.4141806938515634,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-268",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5035493969917297,
14
+ "eval_matthews_correlation": 0.4141806938515634,
15
+ "eval_runtime": 0.7419,
16
+ "eval_samples_per_second": 1405.832,
17
+ "eval_steps_per_second": 88.96,
18
  "step": 268
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 1072,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 4,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
+ "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "learning_rate": 1.0221265871912396e-05,
31
+ "num_train_epochs": 4,
32
  "per_device_train_batch_size": 32,
33
+ "seed": 8
34
  }
35
  }
run-1/checkpoint-268/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1aa22f290642ee01c58508f6341455213d65d2b747968e31a42fb22e7b7ee22
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
+ size 4984
run-1/checkpoint-536/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-536/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d98edead287397dbc85e9965e55e0e9a63f182e2ec349fac928973b841db896
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362d9c0cd2587fbb0107cad0126e6f5d8cff717dff52b10facf7769e2fb5712d
3
  size 267832560
run-1/checkpoint-536/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af1eb72f05bdc789e62e12a02f45666b68ffc9a9e3b265cf587c59eb4f859615
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5212b8745eb1c4c6ef881e3d21b4023283b2bc6765d8cb3aab22e3fe10e74e9
3
  size 535727290
run-1/checkpoint-536/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ebe287cfea3dfa72a8b1b6f7c5dca99ab99027833636d010a42c39c8d98519
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c3a6d458d1ea31703d0759423a03a8df70fee2479aeb6598078341111b8adb
3
+ size 14308
run-1/checkpoint-536/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:131ccd977f9f8b655127b0bfc9d5858f739a5117dd5605449b2124dee8f1f1b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05a47d04eae8f5a42931783c79ad8a5775116597c4d7a406555dd541bfaf966
3
  size 1064
run-1/checkpoint-536/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.4810794261012942,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-536",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,39 +10,42 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.4801884889602661,
14
- "eval_matthews_correlation": 0.4522326299878879,
15
- "eval_runtime": 0.6893,
16
- "eval_samples_per_second": 1513.095,
17
- "eval_steps_per_second": 95.747,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
- "learning_rate": 1.0329146614031314e-05,
23
- "loss": 0.4583,
 
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_loss": 0.5012368559837341,
29
- "eval_matthews_correlation": 0.4810794261012942,
30
- "eval_runtime": 1.1504,
31
- "eval_samples_per_second": 906.647,
32
- "eval_steps_per_second": 57.372,
33
  "step": 536
34
  }
35
  ],
36
  "logging_steps": 500,
37
- "max_steps": 1340,
38
- "num_train_epochs": 5,
 
39
  "save_steps": 500,
40
- "total_flos": 95441467104708.0,
 
41
  "trial_name": null,
42
  "trial_params": {
43
- "learning_rate": 1.6477448170002332e-05,
44
- "num_train_epochs": 5,
45
  "per_device_train_batch_size": 32,
46
- "seed": 34
47
  }
48
  }
 
1
  {
2
+ "best_metric": 0.44251427534495513,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-536",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5035493969917297,
14
+ "eval_matthews_correlation": 0.4141806938515634,
15
+ "eval_runtime": 0.7419,
16
+ "eval_samples_per_second": 1405.832,
17
+ "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "grad_norm": 4.896795749664307,
23
+ "learning_rate": 5.453884401804003e-06,
24
+ "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.4967799484729767,
30
+ "eval_matthews_correlation": 0.44251427534495513,
31
+ "eval_runtime": 0.8433,
32
+ "eval_samples_per_second": 1236.743,
33
+ "eval_steps_per_second": 78.26,
34
  "step": 536
35
  }
36
  ],
37
  "logging_steps": 500,
38
+ "max_steps": 1072,
39
+ "num_input_tokens_seen": 0,
40
+ "num_train_epochs": 4,
41
  "save_steps": 500,
42
+ "total_flos": 96116481642000.0,
43
+ "train_batch_size": 32,
44
  "trial_name": null,
45
  "trial_params": {
46
+ "learning_rate": 1.0221265871912396e-05,
47
+ "num_train_epochs": 4,
48
  "per_device_train_batch_size": 32,
49
+ "seed": 8
50
  }
51
  }
run-1/checkpoint-536/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1aa22f290642ee01c58508f6341455213d65d2b747968e31a42fb22e7b7ee22
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
+ size 4984
run-1/checkpoint-804/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-1/checkpoint-804/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945826e2756aee187ff6d9bdd54154b39351f77603e0170a083bc3c49be4ee68
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4784b2cdaf1f4f2a65b67673cf9a474f50aacc6b6b52a55770494552957df28b
3
  size 267832560
run-1/checkpoint-804/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ea911767fd5bd81c9ae263da864a0ce433894f85b467a23058011a3b4b7e2cb
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494174d39b2739ea5767bec872526ea91a8f51fb559ed3ecd20e2d63381bd46c
3
  size 535727290
run-1/checkpoint-804/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0420cf3993252b233c288946920f1af18011a1b61e0f617168a7cf0d3fe2092b
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65c4502cc2c96e2c95cfbd885846a4486d9bbe6f82fd2b8441148c27de597eb2
3
+ size 14308
run-1/checkpoint-804/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:475f434ce1db47dc716bfd6101d5f9cecc5edefc32602d23d923f0f0f46628c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76cbf4faf8458eeab8085fe97b4e0f9442c5aa8273abf11a8f8bb22a481d6a86
3
  size 1064
run-1/checkpoint-804/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.5202123212283704,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,48 +10,51 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.4801884889602661,
14
- "eval_matthews_correlation": 0.4522326299878879,
15
- "eval_runtime": 0.6893,
16
- "eval_samples_per_second": 1513.095,
17
- "eval_steps_per_second": 95.747,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
- "learning_rate": 1.0329146614031314e-05,
23
- "loss": 0.4583,
 
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_loss": 0.5012368559837341,
29
- "eval_matthews_correlation": 0.4810794261012942,
30
- "eval_runtime": 1.1504,
31
- "eval_samples_per_second": 906.647,
32
- "eval_steps_per_second": 57.372,
33
  "step": 536
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_loss": 0.5001268982887268,
38
- "eval_matthews_correlation": 0.5202123212283704,
39
- "eval_runtime": 0.6949,
40
- "eval_samples_per_second": 1500.847,
41
- "eval_steps_per_second": 94.972,
42
  "step": 804
43
  }
44
  ],
45
  "logging_steps": 500,
46
- "max_steps": 1340,
47
- "num_train_epochs": 5,
 
48
  "save_steps": 500,
49
- "total_flos": 95441467104708.0,
 
50
  "trial_name": null,
51
  "trial_params": {
52
- "learning_rate": 1.6477448170002332e-05,
53
- "num_train_epochs": 5,
54
  "per_device_train_batch_size": 32,
55
- "seed": 34
56
  }
57
  }
 
1
  {
2
+ "best_metric": 0.46849580082104064,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-804",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5035493969917297,
14
+ "eval_matthews_correlation": 0.4141806938515634,
15
+ "eval_runtime": 0.7419,
16
+ "eval_samples_per_second": 1405.832,
17
+ "eval_steps_per_second": 88.96,
18
  "step": 268
19
  },
20
  {
21
  "epoch": 1.87,
22
+ "grad_norm": 4.896795749664307,
23
+ "learning_rate": 5.453884401804003e-06,
24
+ "loss": 0.4997,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_loss": 0.4967799484729767,
30
+ "eval_matthews_correlation": 0.44251427534495513,
31
+ "eval_runtime": 0.8433,
32
+ "eval_samples_per_second": 1236.743,
33
+ "eval_steps_per_second": 78.26,
34
  "step": 536
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_loss": 0.49384433031082153,
39
+ "eval_matthews_correlation": 0.46849580082104064,
40
+ "eval_runtime": 1.1521,
41
+ "eval_samples_per_second": 905.341,
42
+ "eval_steps_per_second": 57.289,
43
  "step": 804
44
  }
45
  ],
46
  "logging_steps": 500,
47
+ "max_steps": 1072,
48
+ "num_input_tokens_seen": 0,
49
+ "num_train_epochs": 4,
50
  "save_steps": 500,
51
+ "total_flos": 96116481642000.0,
52
+ "train_batch_size": 32,
53
  "trial_name": null,
54
  "trial_params": {
55
+ "learning_rate": 1.0221265871912396e-05,
56
+ "num_train_epochs": 4,
57
  "per_device_train_batch_size": 32,
58
+ "seed": 8
59
  }
60
  }
run-1/checkpoint-804/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1aa22f290642ee01c58508f6341455213d65d2b747968e31a42fb22e7b7ee22
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
+ size 4984
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554081.6e5f088ca464.226.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4eb0db1e584b1ca711fac69e781c89919befb1d668dd94d9c7557dc3b971aa
3
+ size 6705
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d71decb8fd1c3b29f479f69dd55dfb41734b34761f0097d998f78bf9dd25bdb
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d6b485a188c4e5c57b6f3bb948e3315674066d3a6ec8f44d6d328263cd659b
3
  size 4984