LarryTW commited on Dec 23, 2023

Commit

1b92d74

1 Parent(s): e495311

Training in progress, epoch 4

Browse files

Files changed (35) hide show

model.safetensors +1 -1
run-7/checkpoint-1072/config.json +25 -0
run-7/checkpoint-1072/model.safetensors +3 -0
run-7/checkpoint-1072/optimizer.pt +3 -0
run-7/checkpoint-1072/rng_state.pth +3 -0
run-7/checkpoint-1072/scheduler.pt +3 -0
run-7/checkpoint-1072/special_tokens_map.json +7 -0
run-7/checkpoint-1072/tokenizer.json +0 -0
run-7/checkpoint-1072/tokenizer_config.json +55 -0
run-7/checkpoint-1072/trainer_state.json +74 -0
run-7/checkpoint-1072/training_args.bin +3 -0
run-7/checkpoint-1072/vocab.txt +0 -0
run-7/checkpoint-536/config.json +25 -0
run-7/checkpoint-536/model.safetensors +3 -0
run-7/checkpoint-536/optimizer.pt +3 -0
run-7/checkpoint-536/rng_state.pth +3 -0
run-7/checkpoint-536/scheduler.pt +3 -0
run-7/checkpoint-536/special_tokens_map.json +7 -0
run-7/checkpoint-536/tokenizer.json +0 -0
run-7/checkpoint-536/tokenizer_config.json +55 -0
run-7/checkpoint-536/trainer_state.json +50 -0
run-7/checkpoint-536/training_args.bin +3 -0
run-7/checkpoint-536/vocab.txt +0 -0
run-7/checkpoint-804/config.json +25 -0
run-7/checkpoint-804/model.safetensors +3 -0
run-7/checkpoint-804/optimizer.pt +3 -0
run-7/checkpoint-804/rng_state.pth +3 -0
run-7/checkpoint-804/scheduler.pt +3 -0
run-7/checkpoint-804/special_tokens_map.json +7 -0
run-7/checkpoint-804/tokenizer.json +0 -0
run-7/checkpoint-804/tokenizer_config.json +55 -0
run-7/checkpoint-804/trainer_state.json +59 -0
run-7/checkpoint-804/training_args.bin +3 -0
run-7/checkpoint-804/vocab.txt +0 -0
runs/Dec23_01-44-29_cab255cafe95/events.out.tfevents.1703296853.cab255cafe95.13310.9 +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bed2ce5875b094ffcccc83f310fc06d112efe2caf2c94b67b258169d58e2948
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f7667f7c1ad16cdef3264c88fb5f1338b4584047676a1c15ebe4619ade199c2
 size 267832560

run-7/checkpoint-1072/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-1072/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f7667f7c1ad16cdef3264c88fb5f1338b4584047676a1c15ebe4619ade199c2
+size 267832560

run-7/checkpoint-1072/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6f14cca08e3903b5aeeb49e30a1719a92cba1b420149a100ab7ad2aab24b71d
+size 535727290

run-7/checkpoint-1072/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a91eb1cb7fd3f940048b8a777e4dc135bfa6833f07cd4ab52581b15d643063e
+size 14244

run-7/checkpoint-1072/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb93df7c9f67b048c9e1711a60f34d805f83df177a5037106aad0f3626f1ac9
+size 1064

run-7/checkpoint-1072/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1072/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1072/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1072/trainer_state.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "best_metric": 0.4356146646972585,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-1072",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1072,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5343004465103149,
+      "eval_matthews_correlation": 0.29918510040507346,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.513,
+      "eval_steps_per_second": 86.598,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.9668501983081725e-06,
+      "loss": 0.5193,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5236551761627197,
+      "eval_matthews_correlation": 0.4241202758215723,
+      "eval_runtime": 0.8223,
+      "eval_samples_per_second": 1268.439,
+      "eval_steps_per_second": 80.266,
+      "step": 536
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5109657645225525,
+      "eval_matthews_correlation": 0.4345489406668151,
+      "eval_runtime": 0.8208,
+      "eval_samples_per_second": 1270.721,
+      "eval_steps_per_second": 80.41,
+      "step": 804
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 3.7344967531151816e-07,
+      "loss": 0.412,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.5060515999794006,
+      "eval_matthews_correlation": 0.4356146646972585,
+      "eval_runtime": 0.8474,
+      "eval_samples_per_second": 1230.852,
+      "eval_steps_per_second": 77.887,
+      "step": 1072
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1072,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 190987976716944.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.560250721304826e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 32,
+    "seed": 28
+  }
+}

run-7/checkpoint-1072/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d64e83635a4da4783e4c55867a30677ea946d7ffee6806d5df9c9caef4e2a1
+size 4792

run-7/checkpoint-1072/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-536/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-536/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cf022ed9db6c4c4d238e486968562c5889ed201007c6e9f4f8228b9dc18ac18
+size 267832560

run-7/checkpoint-536/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9061c34a7fb1bfa6ed8bca9b9b65fbf554748a1b7cc9088de0977425ff0a38a
+size 535727290

run-7/checkpoint-536/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f9e3aaf6b37b75264fbb2c562b3c32515689962db3ac97d7b0606726a5f7b4f
+size 14244

run-7/checkpoint-536/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4dd9fdda0e9c57308f1129be1b70c14bbabc1911fc7f8a62802309576f149e62
+size 1064

run-7/checkpoint-536/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-536/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-536/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-536/trainer_state.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "best_metric": 0.4241202758215723,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-536",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 536,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5343004465103149,
+      "eval_matthews_correlation": 0.29918510040507346,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.513,
+      "eval_steps_per_second": 86.598,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.9668501983081725e-06,
+      "loss": 0.5193,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5236551761627197,
+      "eval_matthews_correlation": 0.4241202758215723,
+      "eval_runtime": 0.8223,
+      "eval_samples_per_second": 1268.439,
+      "eval_steps_per_second": 80.266,
+      "step": 536
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1072,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 95730204637716.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.560250721304826e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 32,
+    "seed": 28
+  }
+}

run-7/checkpoint-536/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d64e83635a4da4783e4c55867a30677ea946d7ffee6806d5df9c9caef4e2a1
+size 4792

run-7/checkpoint-536/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-804/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-804/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a023b11d7a3437b638097cc31dddb7fb5b9e80855940e541a379ae1cd1815fe
+size 267832560

run-7/checkpoint-804/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:392d8b9199dc503ebc85de6f2af12f8c6c473035f1bb998c8935684d57543881
+size 535727290

run-7/checkpoint-804/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:236bc1824b27168750bee59fb6bb860b8024092ba38ee88104c8be0e3585cc1e
+size 14244

run-7/checkpoint-804/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71e9779de90b00476efc04c12e2578382f9597f691f774e7e4984c56fb7456b8
+size 1064

run-7/checkpoint-804/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-804/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-804/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-804/trainer_state.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "best_metric": 0.4345489406668151,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-804",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 804,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5343004465103149,
+      "eval_matthews_correlation": 0.29918510040507346,
+      "eval_runtime": 0.7621,
+      "eval_samples_per_second": 1368.513,
+      "eval_steps_per_second": 86.598,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.9668501983081725e-06,
+      "loss": 0.5193,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5236551761627197,
+      "eval_matthews_correlation": 0.4241202758215723,
+      "eval_runtime": 0.8223,
+      "eval_samples_per_second": 1268.439,
+      "eval_steps_per_second": 80.266,
+      "step": 536
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5109657645225525,
+      "eval_matthews_correlation": 0.4345489406668151,
+      "eval_runtime": 0.8208,
+      "eval_samples_per_second": 1270.721,
+      "eval_steps_per_second": 80.41,
+      "step": 804
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1072,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 95730204637716.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.560250721304826e-06,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 32,
+    "seed": 28
+  }
+}

run-7/checkpoint-804/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d64e83635a4da4783e4c55867a30677ea946d7ffee6806d5df9c9caef4e2a1
+size 4792

run-7/checkpoint-804/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Dec23_01-44-29_cab255cafe95/events.out.tfevents.1703296853.cab255cafe95.13310.9 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d217e298a38ce958199c8de16eff7a7cc35c6e85135f3b11e17dce6cd36ac8c
-size 5506

 version https://git-lfs.github.com/spec/v1
+oid sha256:b83690d895289980404626422b2c357ce800f985b4cd76b944aa60b152ef8978
+size 6352