SjardiWillems commited on Mar 5, 2024

Commit

d53c5c8

verified ·

1 Parent(s): e32a80f

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-3/checkpoint-108/config.json +25 -0
run-3/checkpoint-108/model.safetensors +3 -0
run-3/checkpoint-108/optimizer.pt +3 -0
run-3/checkpoint-108/rng_state.pth +3 -0
run-3/checkpoint-108/scheduler.pt +3 -0
run-3/checkpoint-108/special_tokens_map.json +7 -0
run-3/checkpoint-108/tokenizer.json +0 -0
run-3/checkpoint-108/tokenizer_config.json +55 -0
run-3/checkpoint-108/trainer_state.json +44 -0
run-3/checkpoint-108/training_args.bin +3 -0
run-3/checkpoint-108/vocab.txt +0 -0
run-3/checkpoint-162/config.json +25 -0
run-3/checkpoint-162/model.safetensors +3 -0
run-3/checkpoint-162/optimizer.pt +3 -0
run-3/checkpoint-162/rng_state.pth +3 -0
run-3/checkpoint-162/scheduler.pt +3 -0
run-3/checkpoint-162/special_tokens_map.json +7 -0
run-3/checkpoint-162/tokenizer.json +0 -0
run-3/checkpoint-162/tokenizer_config.json +55 -0
run-3/checkpoint-162/trainer_state.json +53 -0
run-3/checkpoint-162/training_args.bin +3 -0
run-3/checkpoint-162/vocab.txt +0 -0
run-3/checkpoint-216/config.json +25 -0
run-3/checkpoint-216/model.safetensors +3 -0
run-3/checkpoint-216/optimizer.pt +3 -0
run-3/checkpoint-216/rng_state.pth +3 -0
run-3/checkpoint-216/scheduler.pt +3 -0
run-3/checkpoint-216/special_tokens_map.json +7 -0
run-3/checkpoint-216/tokenizer.json +0 -0
run-3/checkpoint-216/tokenizer_config.json +55 -0
run-3/checkpoint-216/trainer_state.json +62 -0
run-3/checkpoint-216/training_args.bin +3 -0
run-3/checkpoint-216/vocab.txt +0 -0
run-3/checkpoint-270/config.json +25 -0
run-3/checkpoint-270/model.safetensors +3 -0
run-3/checkpoint-270/optimizer.pt +3 -0
run-3/checkpoint-270/rng_state.pth +3 -0
run-3/checkpoint-270/scheduler.pt +3 -0
run-3/checkpoint-270/special_tokens_map.json +7 -0
run-3/checkpoint-270/tokenizer.json +0 -0
run-3/checkpoint-270/tokenizer_config.json +55 -0
run-3/checkpoint-270/trainer_state.json +71 -0
run-3/checkpoint-270/training_args.bin +3 -0
run-3/checkpoint-270/vocab.txt +0 -0
run-4/checkpoint-54/config.json +25 -0
run-4/checkpoint-54/model.safetensors +3 -0
run-4/checkpoint-54/optimizer.pt +3 -0
run-4/checkpoint-54/rng_state.pth +3 -0
run-4/checkpoint-54/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24476bddf0cd9468d4ab74ae6bbe580c634cd59d759787b10ec33290ff875f39
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:02862714de993ef37f27e3368f59c2a27353cbade9b451ae7a7827c2a8b05f06
 size 267832560

run-3/checkpoint-108/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-108/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb37c0a0e0ca531963a4e7e0f692d8b7e91d5962cf94ce6df4b896e2b6f859b0
+size 267832560

run-3/checkpoint-108/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53530a8d0bcf32d08f68be361fde14b0b9065efa33a5b666cb78c553b04e92e3
+size 535727290

run-3/checkpoint-108/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6248ee263aa8124cd7bbd3e19df3a7d7ccfdcd13889d62964bda4e725adec1d0
+size 14180

run-3/checkpoint-108/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4552d0fa488a4a6d4a8151b0d4cb2a32d86e6512fcf6660d877f7e497c57610e
+size 1064

run-3/checkpoint-108/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-108/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-108/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-108/trainer_state.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-54",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 108,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6160927414894104,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4953,
+      "eval_samples_per_second": 2105.607,
+      "eval_steps_per_second": 133.241,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6087684035301208,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5033,
+      "eval_samples_per_second": 2072.39,
+      "eval_steps_per_second": 131.139,
+      "step": 108
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.118328225546516e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-3/checkpoint-108/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98eb71cc2a425fe99533b8db21e393132228db26f27ca6e23b50c5fe39be3492
+size 4984

run-3/checkpoint-108/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-162/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-162/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3be9b1103128393fe83f08a5b3e43fb1100909a19ecedf3e013bdbc01289d99d
+size 267832560

run-3/checkpoint-162/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c868dd7ec3bc6e9a3e2fec13c6d6bf3af2aa6ed54788d032090a91ee946ac85
+size 535727290

run-3/checkpoint-162/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68a9ce68cb050eacbdf583e0c527cd361b694283d9eb8c34aba0177144208765
+size 14180

run-3/checkpoint-162/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce613ff5efb031c46a008a6e9112ed30ade6223a12677bcd67e819e55cf5e609
+size 1064

run-3/checkpoint-162/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-162/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-162/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-162/trainer_state.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-54",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 162,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6160927414894104,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4953,
+      "eval_samples_per_second": 2105.607,
+      "eval_steps_per_second": 133.241,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6087684035301208,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5033,
+      "eval_samples_per_second": 2072.39,
+      "eval_steps_per_second": 131.139,
+      "step": 108
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6040787100791931,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4985,
+      "eval_samples_per_second": 2092.254,
+      "eval_steps_per_second": 132.396,
+      "step": 162
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.118328225546516e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-3/checkpoint-162/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98eb71cc2a425fe99533b8db21e393132228db26f27ca6e23b50c5fe39be3492
+size 4984

run-3/checkpoint-162/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-216/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-216/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:800de9d202edd1830402add7f125e8d6f5db08f2dd1a62107f4927bfee50e6bc
+size 267832560

run-3/checkpoint-216/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:821f26d098cf75233065f1800ed011140c405b4e32c393c75de9ab699ca78639
+size 535727290

run-3/checkpoint-216/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffdddfb82c3b4963aa78aa8d28957a2bbb3e40a855a2b9ac45bec4d31ce00afc
+size 14180

run-3/checkpoint-216/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9602eccc673b4ba7a13f63d9a551e782d489b37be0b11ac56839b6ade9c337af
+size 1064

run-3/checkpoint-216/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-216/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-216/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-216/trainer_state.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-54",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 216,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6160927414894104,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4953,
+      "eval_samples_per_second": 2105.607,
+      "eval_steps_per_second": 133.241,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6087684035301208,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5033,
+      "eval_samples_per_second": 2072.39,
+      "eval_steps_per_second": 131.139,
+      "step": 108
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6040787100791931,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4985,
+      "eval_samples_per_second": 2092.254,
+      "eval_steps_per_second": 132.396,
+      "step": 162
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.5997274518013,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5179,
+      "eval_samples_per_second": 2013.994,
+      "eval_steps_per_second": 127.444,
+      "step": 216
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.118328225546516e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-3/checkpoint-216/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98eb71cc2a425fe99533b8db21e393132228db26f27ca6e23b50c5fe39be3492
+size 4984

run-3/checkpoint-216/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-270/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-270/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:891c9400c800e7a4e0cf4a66b75234a81ec48e269afdee30008b594281577267
+size 267832560

run-3/checkpoint-270/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2848ef7caf597c1c8e7fd46397ea348ff4387b8f6e01aad0f77969aa0245ce6c
+size 535727290

run-3/checkpoint-270/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0c091714099710d340535bbcb3659c356e65310b79179b187ea7dae4af20a34
+size 14180

run-3/checkpoint-270/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a41caa36f61731e1a1c86dd5347a8b5c0eb6db1b347a7fa2845f42e83c0c646
+size 1064

run-3/checkpoint-270/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-270/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-270/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-270/trainer_state.json ADDED Viewed

	@@ -0,0 +1,71 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-54",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 270,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6160927414894104,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4953,
+      "eval_samples_per_second": 2105.607,
+      "eval_steps_per_second": 133.241,
+      "step": 54
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6087684035301208,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5033,
+      "eval_samples_per_second": 2072.39,
+      "eval_steps_per_second": 131.139,
+      "step": 108
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6040787100791931,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4985,
+      "eval_samples_per_second": 2092.254,
+      "eval_steps_per_second": 132.396,
+      "step": 162
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.5997274518013,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.5179,
+      "eval_samples_per_second": 2013.994,
+      "eval_steps_per_second": 127.444,
+      "step": 216
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.5987327694892883,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.4912,
+      "eval_samples_per_second": 2123.423,
+      "eval_steps_per_second": 134.368,
+      "step": 270
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 270,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.118328225546516e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 16,
+    "seed": 10
+  }
+}

run-3/checkpoint-270/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98eb71cc2a425fe99533b8db21e393132228db26f27ca6e23b50c5fe39be3492
+size 4984

run-3/checkpoint-270/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-54/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-4/checkpoint-54/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02862714de993ef37f27e3368f59c2a27353cbade9b451ae7a7827c2a8b05f06
+size 267832560

run-4/checkpoint-54/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dea78c6e466a6e28a060acb25b05ae9c3daf1a6c808ce0f60f0fa88bcad3acce
+size 535727290

run-4/checkpoint-54/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db6c018004c8341f62d292cd0e83aea5f442dbd6d99c159bb74281a986c0410b
+size 14244

run-4/checkpoint-54/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1ce217590d9de8b73c6b7064631cd0f4f72df156ea469ee6367953b0fd95b15
+size 1064