Ethan615 commited on
Commit
79965b6
1 Parent(s): e46601f

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3856f5d52b139f68324c7fde3f7b30f487ee6e7fee6adeb65d95f8f88928dc48
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf4873f03711bcd1e5aac7f2cccc8c1e3507a2ce13e9346f72877865b9aa266
3
  size 267832560
run-3/checkpoint-4276/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d07018f8d8711ff007d435c37d6370c998099d5da175275fb842585fb0318b5
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b79ac969e55af980c6ecffec584fca548259b204d7dd280677b4565c71b275f
3
  size 267832560
run-3/checkpoint-4276/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfa4633d55816cfe6eca7524f8815fa881fc69129cd7c1f7abc6d16346f4722b
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df469924aa7f005d2bd0d92593b898e648585bd1333307e56c541f1bf7e85b37
3
  size 535727290
run-3/checkpoint-4276/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e67acc8c695dd03f71ec0d0b05d2661de64c22350ea274426e0f5c6f15576e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f50295e8a0c4f0f933e09f97afc25144ae22817ebc0c489063f461ce78b8860
3
  size 14244
run-3/checkpoint-4276/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3788bef18ab31e49053128984c1d7f892c3b1ad9a0367cbb365fddddc8b3336b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c5117f48e54404543986abbdd101f260b16cc1231e96c07ee8bf4f093b84d7
3
  size 1064
run-3/checkpoint-4276/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.4174403779614697,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
@@ -10,68 +10,68 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.23,
13
- "learning_rate": 2.109376138070761e-06,
14
- "loss": 0.5971,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.47,
19
- "learning_rate": 1.8300625604660523e-06,
20
- "loss": 0.575,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.7,
25
- "learning_rate": 1.5507489828613434e-06,
26
- "loss": 0.5345,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.94,
31
- "learning_rate": 1.2714354052566345e-06,
32
- "loss": 0.5352,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 1.0,
37
- "eval_loss": 0.5899004936218262,
38
- "eval_matthews_correlation": 0.3194134442722084,
39
- "eval_runtime": 0.7491,
40
- "eval_samples_per_second": 1392.35,
41
- "eval_steps_per_second": 88.107,
42
  "step": 2138
43
  },
44
  {
45
  "epoch": 1.17,
46
- "learning_rate": 9.921218276519258e-07,
47
- "loss": 0.4939,
48
  "step": 2500
49
  },
50
  {
51
  "epoch": 1.4,
52
- "learning_rate": 7.128082500472169e-07,
53
- "loss": 0.4899,
54
  "step": 3000
55
  },
56
  {
57
  "epoch": 1.64,
58
- "learning_rate": 4.3349467244250807e-07,
59
- "loss": 0.4685,
60
  "step": 3500
61
  },
62
  {
63
  "epoch": 1.87,
64
- "learning_rate": 1.5418109483779928e-07,
65
- "loss": 0.4906,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 2.0,
70
- "eval_loss": 0.5730822682380676,
71
- "eval_matthews_correlation": 0.4174403779614697,
72
- "eval_runtime": 0.7482,
73
- "eval_samples_per_second": 1394.024,
74
- "eval_steps_per_second": 88.212,
75
  "step": 4276
76
  }
77
  ],
@@ -80,13 +80,13 @@
80
  "num_input_tokens_seen": 0,
81
  "num_train_epochs": 2,
82
  "save_steps": 500,
83
- "total_flos": 65347823599488.0,
84
  "train_batch_size": 4,
85
  "trial_name": null,
86
  "trial_params": {
87
- "learning_rate": 2.38868971567547e-06,
88
  "num_train_epochs": 2,
89
  "per_device_train_batch_size": 4,
90
- "seed": 28
91
  }
92
  }
 
1
  {
2
+ "best_metric": 0.27657164760495423,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.23,
13
+ "learning_rate": 1.3504098486610872e-06,
14
+ "loss": 0.6235,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.47,
19
+ "learning_rate": 1.1715949852260916e-06,
20
+ "loss": 0.6084,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.7,
25
+ "learning_rate": 9.92780121791096e-07,
26
+ "loss": 0.5687,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.94,
31
+ "learning_rate": 8.139652583561002e-07,
32
+ "loss": 0.5599,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 1.0,
37
+ "eval_loss": 0.5971149802207947,
38
+ "eval_matthews_correlation": 0.0463559874942472,
39
+ "eval_runtime": 0.7351,
40
+ "eval_samples_per_second": 1418.884,
41
+ "eval_steps_per_second": 89.786,
42
  "step": 2138
43
  },
44
  {
45
  "epoch": 1.17,
46
+ "learning_rate": 6.351503949211046e-07,
47
+ "loss": 0.5449,
48
  "step": 2500
49
  },
50
  {
51
  "epoch": 1.4,
52
+ "learning_rate": 4.563355314861089e-07,
53
+ "loss": 0.5175,
54
  "step": 3000
55
  },
56
  {
57
  "epoch": 1.64,
58
+ "learning_rate": 2.7752066805111325e-07,
59
+ "loss": 0.535,
60
  "step": 3500
61
  },
62
  {
63
  "epoch": 1.87,
64
+ "learning_rate": 9.870580461611762e-08,
65
+ "loss": 0.5162,
66
  "step": 4000
67
  },
68
  {
69
  "epoch": 2.0,
70
+ "eval_loss": 0.571711540222168,
71
+ "eval_matthews_correlation": 0.27657164760495423,
72
+ "eval_runtime": 0.7318,
73
+ "eval_samples_per_second": 1425.327,
74
+ "eval_steps_per_second": 90.193,
75
  "step": 4276
76
  }
77
  ],
 
80
  "num_input_tokens_seen": 0,
81
  "num_train_epochs": 2,
82
  "save_steps": 500,
83
+ "total_flos": 65111866045632.0,
84
  "train_batch_size": 4,
85
  "trial_name": null,
86
  "trial_params": {
87
+ "learning_rate": 1.529224712096083e-06,
88
  "num_train_epochs": 2,
89
  "per_device_train_batch_size": 4,
90
+ "seed": 5
91
  }
92
  }
run-3/checkpoint-4276/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58fae703abd28cb0ab5528ac939f06e8a9578110f6c6558da0b2c2f9c2704502
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff0b8762417b71e573ee14cf5d6c55e801af11f299848bcf4bde51f1e4499d53
3
  size 4792
run-4/checkpoint-268/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.36.2",
24
+ "vocab_size": 30522
25
+ }
run-4/checkpoint-268/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adf4873f03711bcd1e5aac7f2cccc8c1e3507a2ce13e9346f72877865b9aa266
3
+ size 267832560
run-4/checkpoint-268/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8017e85329f480f7f722447b565c27ed2c5a8ca84e9d341886d1808a9efaafec
3
+ size 535727290
run-4/checkpoint-268/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3c6f33738840ae66d602ec44a31805c3c1244f220af1ccc8bbf30e6d301f82
3
+ size 14244
run-4/checkpoint-268/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48a779c7928a522e4fd10586286eacb8a2942eb602be0c498de64a3454085c13
3
+ size 1064
run-4/checkpoint-268/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-4/checkpoint-268/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-4/checkpoint-268/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-4/checkpoint-268/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4981947529906373,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-4/checkpoint-268",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 268,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.478412002325058,
14
+ "eval_matthews_correlation": 0.4981947529906373,
15
+ "eval_runtime": 0.7412,
16
+ "eval_samples_per_second": 1407.09,
17
+ "eval_steps_per_second": 89.039,
18
+ "step": 268
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 1072,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 4,
25
+ "save_steps": 500,
26
+ "total_flos": 0,
27
+ "train_batch_size": 32,
28
+ "trial_name": null,
29
+ "trial_params": {
30
+ "learning_rate": 1.546889870762945e-05,
31
+ "num_train_epochs": 4,
32
+ "per_device_train_batch_size": 32,
33
+ "seed": 6
34
+ }
35
+ }
run-4/checkpoint-268/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e659d03ffe6756d57c4e0e598cf990e7f41d01544dda0051abdb26300ef89e7f
3
+ size 4792
run-4/checkpoint-268/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294598.cab176ec49ea.681.5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aec5cbcca727aa7079f2bff88b7723e3256d02957fd46f8f12881568ec6a043
3
- size 5459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a7c1c4dcb46e84582be78fff6345d3c543e7652a5375702e10317a67d72d51
3
+ size 6619
runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294819.cab176ec49ea.681.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7916c397e5c4e447fd066debeae674f6659881eeec61b70d611840c9dd919c88
3
+ size 5167
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff0b8762417b71e573ee14cf5d6c55e801af11f299848bcf4bde51f1e4499d53
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e659d03ffe6756d57c4e0e598cf990e7f41d01544dda0051abdb26300ef89e7f
3
  size 4792