harrisr-smu commited on
Commit
5770267
1 Parent(s): 2a34cca

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef7ea03cbb2ef8acdcd0daa0fde218e4cd440636b7165d47e5f66b8276d76d8b
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3d39af8eaf5db8c9342afd21b6daca84a6b7eb421a18e1ded6fe0fb9e1769a
3
  size 267832560
run-0/checkpoint-2138/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d65ea46da3ca47cefe8a2cfbf43b46848a5b1fb2abf441bba7be1bd6ffc4aaaf
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f3d39af8eaf5db8c9342afd21b6daca84a6b7eb421a18e1ded6fe0fb9e1769a
3
  size 267832560
run-0/checkpoint-2138/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:022a11ae30aea8fac11fb2a1311ba454344532f15d273f4b9b0a2d97594c9d97
3
  size 535724410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1364b6c19bf7a3565df25e7200296546a23a2653c62bc826d5c8febd365b4023
3
  size 535724410
run-0/checkpoint-2138/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc984c538d815b2dcbcc83dd5d4f9fdb9b42a881f3c4b008b398f97027ea172a
3
- size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad16f0c4d7a66e49599653968d72067cedd0854ee14b22cc581f6d8e4564719
3
+ size 14054
run-0/checkpoint-2138/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:965a51e45bda376bbffcc05aff621e2e469f0b3daa7d9fb7a9e594a9125b97ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da210971ebe84d4f26ae3da094d81caba11e5482960ce50f043ff98cf1c8d6a4
3
  size 1064
run-0/checkpoint-2138/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.3803319076352094,
3
- "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-2138",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 2138,
@@ -10,48 +10,48 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.4677268475210477,
13
- "grad_norm": 7.03068733215332,
14
- "learning_rate": 1.9917409151067755e-06,
15
- "loss": 0.5954,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.9354536950420954,
20
- "grad_norm": 7.087599754333496,
21
- "learning_rate": 1.3837613927909097e-06,
22
- "loss": 0.5462,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
- "eval_loss": 0.5302711129188538,
28
- "eval_matthews_correlation": 0.3184146642206088,
29
- "eval_runtime": 33.164,
30
- "eval_samples_per_second": 31.45,
31
- "eval_steps_per_second": 1.99,
32
  "step": 1069
33
  },
34
  {
35
  "epoch": 1.4031805425631432,
36
- "grad_norm": 10.998974800109863,
37
- "learning_rate": 7.757818704750443e-07,
38
- "loss": 0.5095,
39
  "step": 1500
40
  },
41
  {
42
  "epoch": 1.8709073900841908,
43
- "grad_norm": 8.676448822021484,
44
- "learning_rate": 1.6780234815917888e-07,
45
- "loss": 0.4851,
46
  "step": 2000
47
  },
48
  {
49
  "epoch": 2.0,
50
- "eval_loss": 0.5344046354293823,
51
- "eval_matthews_correlation": 0.3803319076352094,
52
- "eval_runtime": 34.2159,
53
- "eval_samples_per_second": 30.483,
54
- "eval_steps_per_second": 1.929,
55
  "step": 2138
56
  }
57
  ],
@@ -60,13 +60,13 @@
60
  "num_input_tokens_seen": 0,
61
  "num_train_epochs": 2,
62
  "save_steps": 500,
63
- "total_flos": 75421814032044.0,
64
  "train_batch_size": 8,
65
  "trial_name": null,
66
  "trial_params": {
67
- "learning_rate": 2.599720437422641e-06,
68
  "num_train_epochs": 2,
69
  "per_device_train_batch_size": 8,
70
- "seed": 23
71
  }
72
  }
 
1
  {
2
+ "best_metric": 0.0,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-1069",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 2138,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.4677268475210477,
13
+ "grad_norm": 1.7254643440246582,
14
+ "learning_rate": 1.1968622152407689e-06,
15
+ "loss": 0.6226,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.9354536950420954,
20
+ "grad_norm": 7.422174453735352,
21
+ "learning_rate": 8.315196586959675e-07,
22
+ "loss": 0.5927,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 1.0,
27
+ "eval_loss": 0.5829020738601685,
28
+ "eval_matthews_correlation": 0.0,
29
+ "eval_runtime": 32.3413,
30
+ "eval_samples_per_second": 32.25,
31
+ "eval_steps_per_second": 2.041,
32
  "step": 1069
33
  },
34
  {
35
  "epoch": 1.4031805425631432,
36
+ "grad_norm": 3.6254794597625732,
37
+ "learning_rate": 4.661771021511663e-07,
38
+ "loss": 0.5612,
39
  "step": 1500
40
  },
41
  {
42
  "epoch": 1.8709073900841908,
43
+ "grad_norm": 4.46693229675293,
44
+ "learning_rate": 1.0083454560636514e-07,
45
+ "loss": 0.5373,
46
  "step": 2000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_loss": 0.5621180534362793,
51
+ "eval_matthews_correlation": 0.0,
52
+ "eval_runtime": 28.7656,
53
+ "eval_samples_per_second": 36.259,
54
+ "eval_steps_per_second": 2.294,
55
  "step": 2138
56
  }
57
  ],
 
60
  "num_input_tokens_seen": 0,
61
  "num_train_epochs": 2,
62
  "save_steps": 500,
63
+ "total_flos": 75003196354260.0,
64
  "train_batch_size": 8,
65
  "trial_name": null,
66
  "trial_params": {
67
+ "learning_rate": 1.56220477178557e-06,
68
  "num_train_epochs": 2,
69
  "per_device_train_batch_size": 8,
70
+ "seed": 21
71
  }
72
  }
run-0/checkpoint-2138/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3851731c9251ecf7b27e7c164fec575b150a35fa273ebb279f85322b83ff1b
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dacdab469fdcefaf9db3c5881569ed8f6809388c6a56ea9627c14d9ffaa0fc76
3
  size 5048
runs/Apr23_21-08-52_949a71f95aca/events.out.tfevents.1713912557.949a71f95aca.1272.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a585ed1019c7b682f66058432da448eba291446b6bbfd2a807db3c1eccab2ad7
3
- size 5405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c51bac0046fa9bc7d7afbd72de75bae006babf73388d45e3140b6b1bdbce386f
3
+ size 6516