stuser2023 commited on
Commit
20beda7
1 Parent(s): c85eed8

Training in progress, epoch 3

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c77554997fc9372d2f41138d5724d1fb0cf6940cbf2e4a35b5ff66d9db597aaf
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70c13e5d04c26d3f1731e0285668d3d574cdf941b52bb758883a9e4048d8a5b
3
  size 267832560
run-0/checkpoint-3207/config.json CHANGED
@@ -20,6 +20,6 @@
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
- "transformers_version": "4.35.2",
24
  "vocab_size": 30522
25
  }
 
20
  "sinusoidal_pos_embds": false,
21
  "tie_weights_": true,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
  "vocab_size": 30522
25
  }
run-0/checkpoint-3207/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd8b5a5907d61f119e37cb48d5085a194789fe19c47f09c0950ee7254b10c53b
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70c13e5d04c26d3f1731e0285668d3d574cdf941b52bb758883a9e4048d8a5b
3
  size 267832560
run-0/checkpoint-3207/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:517170c57b9be3696da3dd842a49bd5336ec92bc76d685ea7ccaefdb308aa19b
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0762713d693092e8633e570b159f155f90374816f34b422ea7e34aa6b57ae9e8
3
  size 535727290
run-0/checkpoint-3207/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b86d21d26c9cb146bee42e59ef53f8aa2fcf92189b222d0f5ca339b6dbf1f9b6
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:445ae490ad11c5908dee003a7cb2af17d54315221652bcb189faa8802c82ae3c
3
+ size 14308
run-0/checkpoint-3207/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c6ff343cdc8c697c60c82591ed7de93723747ee6e3240097235400049bd0d1f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7669f715960383560f0bdbf6de5a8609a9498796c5d5deb7e7783436b5764a75
3
  size 1064
run-0/checkpoint-3207/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.166651669293941,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-3207",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,78 +10,86 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.47,
13
- "learning_rate": 9.447053301406701e-07,
14
- "loss": 0.6327,
 
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.94,
19
- "learning_rate": 7.702122880016472e-07,
20
- "loss": 0.5907,
 
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 1.0,
25
- "eval_loss": 0.5891121625900269,
26
- "eval_matthews_correlation": 0.0,
27
- "eval_runtime": 0.6963,
28
- "eval_samples_per_second": 1497.889,
29
- "eval_steps_per_second": 94.785,
30
  "step": 1069
31
  },
32
  {
33
  "epoch": 1.4,
34
- "learning_rate": 5.957192458626242e-07,
35
- "loss": 0.5573,
 
36
  "step": 1500
37
  },
38
  {
39
  "epoch": 1.87,
40
- "learning_rate": 4.2122620372360136e-07,
41
- "loss": 0.5487,
 
42
  "step": 2000
43
  },
44
  {
45
  "epoch": 2.0,
46
- "eval_loss": 0.55595862865448,
47
- "eval_matthews_correlation": 0.12300105566407585,
48
- "eval_runtime": 0.7049,
49
- "eval_samples_per_second": 1479.688,
50
- "eval_steps_per_second": 93.633,
51
  "step": 2138
52
  },
53
  {
54
  "epoch": 2.34,
55
- "learning_rate": 2.4673316158457843e-07,
56
- "loss": 0.5211,
 
57
  "step": 2500
58
  },
59
  {
60
  "epoch": 2.81,
61
- "learning_rate": 7.22401194455555e-08,
62
- "loss": 0.5205,
 
63
  "step": 3000
64
  },
65
  {
66
  "epoch": 3.0,
67
- "eval_loss": 0.55258709192276,
68
- "eval_matthews_correlation": 0.166651669293941,
69
- "eval_runtime": 1.1749,
70
- "eval_samples_per_second": 887.771,
71
- "eval_steps_per_second": 56.177,
72
  "step": 3207
73
  }
74
  ],
75
  "logging_steps": 500,
76
  "max_steps": 3207,
 
77
  "num_train_epochs": 3,
78
  "save_steps": 500,
79
- "total_flos": 113236081840572.0,
 
80
  "trial_name": null,
81
  "trial_params": {
82
- "learning_rate": 1.119198372279693e-06,
83
  "num_train_epochs": 3,
84
  "per_device_train_batch_size": 8,
85
- "seed": 19
86
  }
87
  }
 
1
  {
2
+ "best_metric": 0.4386209168112411,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-3207",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.47,
13
+ "grad_norm": 5.35684061050415,
14
+ "learning_rate": 2.775809181053764e-06,
15
+ "loss": 0.5956,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.94,
20
+ "grad_norm": 11.188376426696777,
21
+ "learning_rate": 2.263099690648562e-06,
22
+ "loss": 0.5375,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_loss": 0.5379385948181152,
28
+ "eval_matthews_correlation": 0.29871168199754417,
29
+ "eval_runtime": 0.7649,
30
+ "eval_samples_per_second": 1363.614,
31
+ "eval_steps_per_second": 86.288,
32
  "step": 1069
33
  },
34
  {
35
  "epoch": 1.4,
36
+ "grad_norm": 10.391807556152344,
37
+ "learning_rate": 1.7503902002433598e-06,
38
+ "loss": 0.4937,
39
  "step": 1500
40
  },
41
  {
42
  "epoch": 1.87,
43
+ "grad_norm": 19.357559204101562,
44
+ "learning_rate": 1.2376807098381578e-06,
45
+ "loss": 0.4738,
46
  "step": 2000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_loss": 0.526983916759491,
51
+ "eval_matthews_correlation": 0.41870797137315424,
52
+ "eval_runtime": 0.7468,
53
+ "eval_samples_per_second": 1396.551,
54
+ "eval_steps_per_second": 88.372,
55
  "step": 2138
56
  },
57
  {
58
  "epoch": 2.34,
59
+ "grad_norm": 11.473833084106445,
60
+ "learning_rate": 7.249712194329557e-07,
61
+ "loss": 0.4364,
62
  "step": 2500
63
  },
64
  {
65
  "epoch": 2.81,
66
+ "grad_norm": 12.574313163757324,
67
+ "learning_rate": 2.1226172902775366e-07,
68
+ "loss": 0.4349,
69
  "step": 3000
70
  },
71
  {
72
  "epoch": 3.0,
73
+ "eval_loss": 0.5435938239097595,
74
+ "eval_matthews_correlation": 0.4386209168112411,
75
+ "eval_runtime": 0.7548,
76
+ "eval_samples_per_second": 1381.78,
77
+ "eval_steps_per_second": 87.438,
78
  "step": 3207
79
  }
80
  ],
81
  "logging_steps": 500,
82
  "max_steps": 3207,
83
+ "num_input_tokens_seen": 0,
84
  "num_train_epochs": 3,
85
  "save_steps": 500,
86
+ "total_flos": 113055491519748.0,
87
+ "train_batch_size": 8,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "learning_rate": 3.288518671458966e-06,
91
  "num_train_epochs": 3,
92
  "per_device_train_batch_size": 8,
93
+ "seed": 24
94
  }
95
  }
run-0/checkpoint-3207/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2697eed6b7ac5cb3358c6f6a74efea40e321a5f5e6bfffafbe5f4a6b261625ae
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d71decb8fd1c3b29f479f69dd55dfb41734b34761f0097d998f78bf9dd25bdb
3
+ size 4984
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710553848.6e5f088ca464.226.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7778f58cb3039c820ca741293fa3121735d684832990ccb4eded482d24e50a
3
- size 6102
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:566b6d43e3a3d98e058f41bbd4d4be4f9fd3352b7212b447b67a36376d5b16df
3
+ size 7213