Ethan615 commited on Dec 23, 2023

Commit

79965b6

•

1 Parent(s): e46601f

Training in progress, epoch 1

Browse files

Files changed (21) hide show

model.safetensors +1 -1
run-3/checkpoint-4276/model.safetensors +1 -1
run-3/checkpoint-4276/optimizer.pt +1 -1
run-3/checkpoint-4276/rng_state.pth +1 -1
run-3/checkpoint-4276/scheduler.pt +1 -1
run-3/checkpoint-4276/trainer_state.json +30 -30
run-3/checkpoint-4276/training_args.bin +1 -1
run-4/checkpoint-268/config.json +25 -0
run-4/checkpoint-268/model.safetensors +3 -0
run-4/checkpoint-268/optimizer.pt +3 -0
run-4/checkpoint-268/rng_state.pth +3 -0
run-4/checkpoint-268/scheduler.pt +3 -0
run-4/checkpoint-268/special_tokens_map.json +7 -0
run-4/checkpoint-268/tokenizer.json +0 -0
run-4/checkpoint-268/tokenizer_config.json +55 -0
run-4/checkpoint-268/trainer_state.json +35 -0
run-4/checkpoint-268/training_args.bin +3 -0
run-4/checkpoint-268/vocab.txt +0 -0
runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294598.cab176ec49ea.681.5 +2 -2
runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294819.cab176ec49ea.681.6 +3 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3856f5d52b139f68324c7fde3f7b30f487ee6e7fee6adeb65d95f8f88928dc48
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:adf4873f03711bcd1e5aac7f2cccc8c1e3507a2ce13e9346f72877865b9aa266
 size 267832560

run-3/checkpoint-4276/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d07018f8d8711ff007d435c37d6370c998099d5da175275fb842585fb0318b5
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b79ac969e55af980c6ecffec584fca548259b204d7dd280677b4565c71b275f
 size 267832560

run-3/checkpoint-4276/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfa4633d55816cfe6eca7524f8815fa881fc69129cd7c1f7abc6d16346f4722b
 size 535727290

 version https://git-lfs.github.com/spec/v1
+oid sha256:df469924aa7f005d2bd0d92593b898e648585bd1333307e56c541f1bf7e85b37
 size 535727290

run-3/checkpoint-4276/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86e67acc8c695dd03f71ec0d0b05d2661de64c22350ea274426e0f5c6f15576e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f50295e8a0c4f0f933e09f97afc25144ae22817ebc0c489063f461ce78b8860
 size 14244

run-3/checkpoint-4276/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3788bef18ab31e49053128984c1d7f892c3b1ad9a0367cbb365fddddc8b3336b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:73c5117f48e54404543986abbdd101f260b16cc1231e96c07ee8bf4f093b84d7
 size 1064

run-3/checkpoint-4276/trainer_state.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "best_metric": 0.4174403779614697,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276",
   "epoch": 2.0,
   "eval_steps": 500,
@@ -10,68 +10,68 @@
   "log_history": [
     {
       "epoch": 0.23,
-      "learning_rate": 2.109376138070761e-06,
-      "loss": 0.5971,
       "step": 500
     },
     {
       "epoch": 0.47,
-      "learning_rate": 1.8300625604660523e-06,
-      "loss": 0.575,
       "step": 1000
     },
     {
       "epoch": 0.7,
-      "learning_rate": 1.5507489828613434e-06,
-      "loss": 0.5345,
       "step": 1500
     },
     {
       "epoch": 0.94,
-      "learning_rate": 1.2714354052566345e-06,
-      "loss": 0.5352,
       "step": 2000
     },
     {
       "epoch": 1.0,
-      "eval_loss": 0.5899004936218262,
-      "eval_matthews_correlation": 0.3194134442722084,
-      "eval_runtime": 0.7491,
-      "eval_samples_per_second": 1392.35,
-      "eval_steps_per_second": 88.107,
       "step": 2138
     },
     {
       "epoch": 1.17,
-      "learning_rate": 9.921218276519258e-07,
-      "loss": 0.4939,
       "step": 2500
     },
     {
       "epoch": 1.4,
-      "learning_rate": 7.128082500472169e-07,
-      "loss": 0.4899,
       "step": 3000
     },
     {
       "epoch": 1.64,
-      "learning_rate": 4.3349467244250807e-07,
-      "loss": 0.4685,
       "step": 3500
     },
     {
       "epoch": 1.87,
-      "learning_rate": 1.5418109483779928e-07,
-      "loss": 0.4906,
       "step": 4000
     },
     {
       "epoch": 2.0,
-      "eval_loss": 0.5730822682380676,
-      "eval_matthews_correlation": 0.4174403779614697,
-      "eval_runtime": 0.7482,
-      "eval_samples_per_second": 1394.024,
-      "eval_steps_per_second": 88.212,
       "step": 4276
     }
   ],
@@ -80,13 +80,13 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 500,
-  "total_flos": 65347823599488.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": {
-    "learning_rate": 2.38868971567547e-06,
     "num_train_epochs": 2,
     "per_device_train_batch_size": 4,
-    "seed": 28
   }
 }

 {
+  "best_metric": 0.27657164760495423,
   "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-4276",
   "epoch": 2.0,
   "eval_steps": 500,
   "log_history": [
     {
       "epoch": 0.23,
+      "learning_rate": 1.3504098486610872e-06,
+      "loss": 0.6235,
       "step": 500
     },
     {
       "epoch": 0.47,
+      "learning_rate": 1.1715949852260916e-06,
+      "loss": 0.6084,
       "step": 1000
     },
     {
       "epoch": 0.7,
+      "learning_rate": 9.92780121791096e-07,
+      "loss": 0.5687,
       "step": 1500
     },
     {
       "epoch": 0.94,
+      "learning_rate": 8.139652583561002e-07,
+      "loss": 0.5599,
       "step": 2000
     },
     {
       "epoch": 1.0,
+      "eval_loss": 0.5971149802207947,
+      "eval_matthews_correlation": 0.0463559874942472,
+      "eval_runtime": 0.7351,
+      "eval_samples_per_second": 1418.884,
+      "eval_steps_per_second": 89.786,
       "step": 2138
     },
     {
       "epoch": 1.17,
+      "learning_rate": 6.351503949211046e-07,
+      "loss": 0.5449,
       "step": 2500
     },
     {
       "epoch": 1.4,
+      "learning_rate": 4.563355314861089e-07,
+      "loss": 0.5175,
       "step": 3000
     },
     {
       "epoch": 1.64,
+      "learning_rate": 2.7752066805111325e-07,
+      "loss": 0.535,
       "step": 3500
     },
     {
       "epoch": 1.87,
+      "learning_rate": 9.870580461611762e-08,
+      "loss": 0.5162,
       "step": 4000
     },
     {
       "epoch": 2.0,
+      "eval_loss": 0.571711540222168,
+      "eval_matthews_correlation": 0.27657164760495423,
+      "eval_runtime": 0.7318,
+      "eval_samples_per_second": 1425.327,
+      "eval_steps_per_second": 90.193,
       "step": 4276
     }
   ],
   "num_input_tokens_seen": 0,
   "num_train_epochs": 2,
   "save_steps": 500,
+  "total_flos": 65111866045632.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": {
+    "learning_rate": 1.529224712096083e-06,
     "num_train_epochs": 2,
     "per_device_train_batch_size": 4,
+    "seed": 5
   }
 }

run-3/checkpoint-4276/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58fae703abd28cb0ab5528ac939f06e8a9578110f6c6558da0b2c2f9c2704502
 size 4792

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff0b8762417b71e573ee14cf5d6c55e801af11f299848bcf4bde51f1e4499d53
 size 4792

run-4/checkpoint-268/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-4/checkpoint-268/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adf4873f03711bcd1e5aac7f2cccc8c1e3507a2ce13e9346f72877865b9aa266
+size 267832560

run-4/checkpoint-268/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8017e85329f480f7f722447b565c27ed2c5a8ca84e9d341886d1808a9efaafec
+size 535727290

run-4/checkpoint-268/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f3c6f33738840ae66d602ec44a31805c3c1244f220af1ccc8bbf30e6d301f82
+size 14244

run-4/checkpoint-268/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48a779c7928a522e4fd10586286eacb8a2942eb602be0c498de64a3454085c13
+size 1064

run-4/checkpoint-268/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-268/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-268/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-268/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.4981947529906373,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-4/checkpoint-268",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 268,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.478412002325058,
+      "eval_matthews_correlation": 0.4981947529906373,
+      "eval_runtime": 0.7412,
+      "eval_samples_per_second": 1407.09,
+      "eval_steps_per_second": 89.039,
+      "step": 268
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1072,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.546889870762945e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 32,
+    "seed": 6
+  }
+}

run-4/checkpoint-268/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e659d03ffe6756d57c4e0e598cf990e7f41d01544dda0051abdb26300ef89e7f
+size 4792

run-4/checkpoint-268/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294598.cab176ec49ea.681.5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0aec5cbcca727aa7079f2bff88b7723e3256d02957fd46f8f12881568ec6a043
-size 5459

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3a7c1c4dcb46e84582be78fff6345d3c543e7652a5375702e10317a67d72d51
+size 6619

runs/Dec23_01-09-39_cab176ec49ea/events.out.tfevents.1703294819.cab176ec49ea.681.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7916c397e5c4e447fd066debeae674f6659881eeec61b70d611840c9dd919c88
+size 5167

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff0b8762417b71e573ee14cf5d6c55e801af11f299848bcf4bde51f1e4499d53
 size 4792

 version https://git-lfs.github.com/spec/v1
+oid sha256:e659d03ffe6756d57c4e0e598cf990e7f41d01544dda0051abdb26300ef89e7f
 size 4792