foolstar41 commited on Dec 23, 2023

Commit

e89b10c

1 Parent(s): 4d5e0a7

Training in progress, epoch 1

Browse files

Files changed (48) hide show

model.safetensors +1 -1
run-2/checkpoint-1070/config.json +25 -0
run-2/checkpoint-1070/model.safetensors +3 -0
run-2/checkpoint-1070/optimizer.pt +3 -0
run-2/checkpoint-1070/rng_state.pth +3 -0
run-2/checkpoint-1070/scheduler.pt +3 -0
run-2/checkpoint-1070/special_tokens_map.json +7 -0
run-2/checkpoint-1070/tokenizer.json +0 -0
run-2/checkpoint-1070/tokenizer_config.json +55 -0
run-2/checkpoint-1070/trainer_state.json +56 -0
run-2/checkpoint-1070/training_args.bin +3 -0
run-2/checkpoint-1070/vocab.txt +0 -0
run-2/checkpoint-1605/config.json +25 -0
run-2/checkpoint-1605/model.safetensors +3 -0
run-2/checkpoint-1605/optimizer.pt +3 -0
run-2/checkpoint-1605/rng_state.pth +3 -0
run-2/checkpoint-1605/scheduler.pt +3 -0
run-2/checkpoint-1605/special_tokens_map.json +7 -0
run-2/checkpoint-1605/tokenizer.json +0 -0
run-2/checkpoint-1605/tokenizer_config.json +55 -0
run-2/checkpoint-1605/trainer_state.json +71 -0
run-2/checkpoint-1605/training_args.bin +3 -0
run-2/checkpoint-1605/vocab.txt +0 -0
run-2/checkpoint-2140/config.json +25 -0
run-2/checkpoint-2140/model.safetensors +3 -0
run-2/checkpoint-2140/optimizer.pt +3 -0
run-2/checkpoint-2140/rng_state.pth +3 -0
run-2/checkpoint-2140/scheduler.pt +3 -0
run-2/checkpoint-2140/special_tokens_map.json +7 -0
run-2/checkpoint-2140/tokenizer.json +0 -0
run-2/checkpoint-2140/tokenizer_config.json +55 -0
run-2/checkpoint-2140/trainer_state.json +86 -0
run-2/checkpoint-2140/training_args.bin +3 -0
run-2/checkpoint-2140/vocab.txt +0 -0
run-3/checkpoint-1069/config.json +25 -0
run-3/checkpoint-1069/model.safetensors +3 -0
run-3/checkpoint-1069/optimizer.pt +3 -0
run-3/checkpoint-1069/rng_state.pth +3 -0
run-3/checkpoint-1069/scheduler.pt +3 -0
run-3/checkpoint-1069/special_tokens_map.json +7 -0
run-3/checkpoint-1069/tokenizer.json +0 -0
run-3/checkpoint-1069/tokenizer_config.json +55 -0
run-3/checkpoint-1069/trainer_state.json +47 -0
run-3/checkpoint-1069/training_args.bin +3 -0
run-3/checkpoint-1069/vocab.txt +0 -0
runs/Dec23_02-07-52_773b0b4b06bc/events.out.tfevents.1703298258.773b0b4b06bc.493.4 +2 -2
runs/Dec23_02-07-52_773b0b4b06bc/events.out.tfevents.1703298415.773b0b4b06bc.493.5 +3 -0
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c605558fb4679495f2033d1404356d51f142cd8af744ebcf9c7977a6ae09bc8
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:604dafc6720048b897c01028f12fdf776cd8c0621457c3ffc5d896a215031869
 size 267832560

run-2/checkpoint-1070/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-1070/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55b4bb7446a810b7da0ff0e709a8f32e09b29bfa9d3dc94c88cfac5e5f8b89d8
+size 267832560

run-2/checkpoint-1070/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a573ac367686ea275c17b957e2f3095f8171656ec99f35dbd54940c4d69e50a
+size 535727290

run-2/checkpoint-1070/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1cdefaa1e2bd723286fc009dcf16a060253d8179c850b70782edba370ef01e2
+size 14244

run-2/checkpoint-1070/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ad391175ed1411901ccde74f636d5258df810e6dd7a3ec05e38fc2d08b2addb
+size 1064

run-2/checkpoint-1070/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-1070/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-1070/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-1070/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.4584020596329,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-1070",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1070,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.93,
+      "learning_rate": 3.854935106207992e-05,
+      "loss": 0.529,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4637433886528015,
+      "eval_matthews_correlation": 0.42825981555844306,
+      "eval_runtime": 0.7351,
+      "eval_samples_per_second": 1418.842,
+      "eval_steps_per_second": 89.783,
+      "step": 535
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.6796500128518964e-05,
+      "loss": 0.327,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5142660737037659,
+      "eval_matthews_correlation": 0.4584020596329,
+      "eval_runtime": 0.7637,
+      "eval_samples_per_second": 1365.788,
+      "eval_steps_per_second": 86.426,
+      "step": 1070
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2140,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 85088053253112.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.030220199564087e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 16,
+    "seed": 27
+  }
+}

run-2/checkpoint-1070/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaf5433238535e3a9051fa91efd995dc6d4ed3adae963af5d669ca9f9d094baa
+size 4792

run-2/checkpoint-1070/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-1605/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-1605/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:476195028d3b2da2e4d6d15c798bde9964aa9b1b5bcfff8fec3b1c1f86ef2607
+size 267832560

run-2/checkpoint-1605/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:609030efa853633e0b3885e896c8c818e11ce204ae26bdc762f0d97e40a5d6f4
+size 535727290

run-2/checkpoint-1605/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcab5315ffdddce50b8e2944aa184bbf66f00c8352ed1fbbc651362b9832d3fc
+size 14244

run-2/checkpoint-1605/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef473577c87aedfdadfe1cef43f21a6d0b05ab0f4272b2d550ca3cccfbb522f6
+size 1064

run-2/checkpoint-1605/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-1605/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-1605/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-1605/trainer_state.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "best_metric": 0.507910414966408,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-1605",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1605,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.93,
+      "learning_rate": 3.854935106207992e-05,
+      "loss": 0.529,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4637433886528015,
+      "eval_matthews_correlation": 0.42825981555844306,
+      "eval_runtime": 0.7351,
+      "eval_samples_per_second": 1418.842,
+      "eval_steps_per_second": 89.783,
+      "step": 535
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.6796500128518964e-05,
+      "loss": 0.327,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5142660737037659,
+      "eval_matthews_correlation": 0.4584020596329,
+      "eval_runtime": 0.7637,
+      "eval_samples_per_second": 1365.788,
+      "eval_steps_per_second": 86.426,
+      "step": 1070
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 1.5043649194958016e-05,
+      "loss": 0.1912,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7642378807067871,
+      "eval_matthews_correlation": 0.507910414966408,
+      "eval_runtime": 0.7199,
+      "eval_samples_per_second": 1448.91,
+      "eval_steps_per_second": 91.686,
+      "step": 1605
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2140,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 127289010015180.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.030220199564087e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 16,
+    "seed": 27
+  }
+}

run-2/checkpoint-1605/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaf5433238535e3a9051fa91efd995dc6d4ed3adae963af5d669ca9f9d094baa
+size 4792

run-2/checkpoint-1605/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-2140/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-2140/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a391606c00f60d6af6f30363a22dc346fa6c18c1ebc7a5f34d12945ecdd319b5
+size 267832560

run-2/checkpoint-2140/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd63fd3a7c9fb05e0d09ad7c496fd41be1e4624156b7e5ca906d5e89ef020021
+size 535727290

run-2/checkpoint-2140/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c814120057184dafddb86724d94ca5be1cf82057428330436240a4693f258c80
+size 14244

run-2/checkpoint-2140/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:399e917a21f1a0301575a3298d7be21cd8d3f82babc36553ebae421fd3db8e47
+size 1064

run-2/checkpoint-2140/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-2140/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-2140/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-2140/trainer_state.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "best_metric": 0.507910414966408,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-1605",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2140,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.93,
+      "learning_rate": 3.854935106207992e-05,
+      "loss": 0.529,
+      "step": 500
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4637433886528015,
+      "eval_matthews_correlation": 0.42825981555844306,
+      "eval_runtime": 0.7351,
+      "eval_samples_per_second": 1418.842,
+      "eval_steps_per_second": 89.783,
+      "step": 535
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 2.6796500128518964e-05,
+      "loss": 0.327,
+      "step": 1000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5142660737037659,
+      "eval_matthews_correlation": 0.4584020596329,
+      "eval_runtime": 0.7637,
+      "eval_samples_per_second": 1365.788,
+      "eval_steps_per_second": 86.426,
+      "step": 1070
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 1.5043649194958016e-05,
+      "loss": 0.1912,
+      "step": 1500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7642378807067871,
+      "eval_matthews_correlation": 0.507910414966408,
+      "eval_runtime": 0.7199,
+      "eval_samples_per_second": 1448.91,
+      "eval_steps_per_second": 91.686,
+      "step": 1605
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 3.290798261397066e-06,
+      "loss": 0.1201,
+      "step": 2000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.9557287096977234,
+      "eval_matthews_correlation": 0.49787102363508634,
+      "eval_runtime": 1.0346,
+      "eval_samples_per_second": 1008.128,
+      "eval_steps_per_second": 63.793,
+      "step": 2140
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2140,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 170405595925380.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 5.030220199564087e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 16,
+    "seed": 27
+  }
+}

run-2/checkpoint-2140/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eaf5433238535e3a9051fa91efd995dc6d4ed3adae963af5d669ca9f9d094baa
+size 4792

run-2/checkpoint-2140/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1069/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-1069/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:604dafc6720048b897c01028f12fdf776cd8c0621457c3ffc5d896a215031869
+size 267832560

run-3/checkpoint-1069/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01a6468bc8ebd0404709628469436e2049e184b6f3586ee3289e0387a6a73496
+size 535727290

run-3/checkpoint-1069/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfdfabb6c5fe1c92db76035f9980a0c2272c0b5ef1bdd093af8ac52086434f02
+size 14244

run-3/checkpoint-1069/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83b2afb6c14a6a18e0702b97d37b2f587736fddb8f22e80e86a43bbb5b160102
+size 1064

run-3/checkpoint-1069/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1069/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-1069/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-1069/trainer_state.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "best_metric": 0.4153896652547264,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-1069",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1069,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.47,
+      "learning_rate": 2.0073962409744884e-05,
+      "loss": 0.5491,
+      "step": 500
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 1.6366174746326915e-05,
+      "loss": 0.4981,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.4900423288345337,
+      "eval_matthews_correlation": 0.4153896652547264,
+      "eval_runtime": 0.7314,
+      "eval_samples_per_second": 1425.957,
+      "eval_steps_per_second": 90.233,
+      "step": 1069
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3207,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 37577275353120.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.3781750073162853e-05,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 8,
+    "seed": 4
+  }
+}

run-3/checkpoint-1069/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afcad0aa7e2ccb8e2de574c0db08353470aae4815118a27ef0f48d3f85331771
+size 4792

run-3/checkpoint-1069/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Dec23_02-07-52_773b0b4b06bc/events.out.tfevents.1703298258.773b0b4b06bc.493.4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09236e96810c0573bd6d0fe415d67c8907d9d216953753c2942873c6ab58edc5
-size 4836

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d3314bccee4f682523ad0e625ee6d8e1822b8b95343c90b991806a7d985d07d
+size 6666

runs/Dec23_02-07-52_773b0b4b06bc/events.out.tfevents.1703298415.773b0b4b06bc.493.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c77a52861a2a68f58ad1f712805a70de62eac48aeab1039495ca86d145bf7f38
+size 5149

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eaf5433238535e3a9051fa91efd995dc6d4ed3adae963af5d669ca9f9d094baa
 size 4792

 version https://git-lfs.github.com/spec/v1
+oid sha256:afcad0aa7e2ccb8e2de574c0db08353470aae4815118a27ef0f48d3f85331771
 size 4792