RavingRabbit commited on Dec 23, 2023

Commit

db5bd2b

1 Parent(s): 5ac0c01

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-2/checkpoint-6414/config.json +25 -0
run-2/checkpoint-6414/model.safetensors +3 -0
run-2/checkpoint-6414/optimizer.pt +3 -0
run-2/checkpoint-6414/rng_state.pth +3 -0
run-2/checkpoint-6414/scheduler.pt +3 -0
run-2/checkpoint-6414/special_tokens_map.json +7 -0
run-2/checkpoint-6414/tokenizer.json +0 -0
run-2/checkpoint-6414/tokenizer_config.json +55 -0
run-2/checkpoint-6414/trainer_state.json +125 -0
run-2/checkpoint-6414/training_args.bin +3 -0
run-2/checkpoint-6414/vocab.txt +0 -0
run-2/checkpoint-8552/config.json +25 -0
run-2/checkpoint-8552/model.safetensors +3 -0
run-2/checkpoint-8552/optimizer.pt +3 -0
run-2/checkpoint-8552/rng_state.pth +3 -0
run-2/checkpoint-8552/scheduler.pt +3 -0
run-2/checkpoint-8552/special_tokens_map.json +7 -0
run-2/checkpoint-8552/tokenizer.json +0 -0
run-2/checkpoint-8552/tokenizer_config.json +55 -0
run-2/checkpoint-8552/trainer_state.json +164 -0
run-2/checkpoint-8552/training_args.bin +3 -0
run-2/checkpoint-8552/vocab.txt +0 -0
run-3/checkpoint-268/config.json +25 -0
run-3/checkpoint-268/model.safetensors +3 -0
run-3/checkpoint-268/optimizer.pt +3 -0
run-3/checkpoint-268/rng_state.pth +3 -0
run-3/checkpoint-268/scheduler.pt +3 -0
run-3/checkpoint-268/special_tokens_map.json +7 -0
run-3/checkpoint-268/tokenizer.json +0 -0
run-3/checkpoint-268/tokenizer_config.json +55 -0
run-3/checkpoint-268/trainer_state.json +35 -0
run-3/checkpoint-268/training_args.bin +3 -0
run-3/checkpoint-268/vocab.txt +0 -0
run-4/checkpoint-1069/config.json +25 -0
run-4/checkpoint-1069/model.safetensors +3 -0
run-4/checkpoint-1069/optimizer.pt +3 -0
run-4/checkpoint-1069/rng_state.pth +3 -0
run-4/checkpoint-1069/scheduler.pt +3 -0
run-4/checkpoint-1069/special_tokens_map.json +7 -0
run-4/checkpoint-1069/tokenizer.json +0 -0
run-4/checkpoint-1069/tokenizer_config.json +55 -0
run-4/checkpoint-1069/trainer_state.json +47 -0
run-4/checkpoint-1069/training_args.bin +3 -0
run-4/checkpoint-1069/vocab.txt +0 -0
run-4/checkpoint-2138/config.json +25 -0
run-4/checkpoint-2138/model.safetensors +3 -0
run-4/checkpoint-2138/optimizer.pt +3 -0
run-4/checkpoint-2138/rng_state.pth +3 -0
run-4/checkpoint-2138/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:808e0a246341a493e48c2982ca451e81f4531e05f3428ac49e8a2f00794fbdf9
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:895b4cfe56f9bbe61f6d2012ab8ec05f32493cb2973ad993275335f1e64bed55
 size 267832560

run-2/checkpoint-6414/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-6414/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:808e0a246341a493e48c2982ca451e81f4531e05f3428ac49e8a2f00794fbdf9
+size 267832560

run-2/checkpoint-6414/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a0ba3198d71f4fd472e1871c205e3548d227853588654bc7512f8b0d9dfdaf1
+size 535727290

run-2/checkpoint-6414/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b2276213d45264db040275028de0a8d1f0c5368380334f20db9fcc540f0a46e
+size 14308

run-2/checkpoint-6414/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3139ec30d25d77e4bbd7a6330b1fd1343040178781225ba9b000a5c7097e154b
+size 1064

run-2/checkpoint-6414/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-6414/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-6414/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-6414/trainer_state.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-2138",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6414,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23,
+      "learning_rate": 6.673421496233018e-05,
+      "loss": 0.6049,
+      "step": 500
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 6.259026222000962e-05,
+      "loss": 0.6341,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5.844630947768907e-05,
+      "loss": 0.6276,
+      "step": 1500
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 5.4302356735368516e-05,
+      "loss": 0.614,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.619145393371582,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7293,
+      "eval_samples_per_second": 1430.082,
+      "eval_steps_per_second": 90.494,
+      "step": 2138
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 5.015840399304796e-05,
+      "loss": 0.6223,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 4.601445125072741e-05,
+      "loss": 0.6101,
+      "step": 3000
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 4.187049850840686e-05,
+      "loss": 0.6246,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 3.7726545766086306e-05,
+      "loss": 0.603,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6236200332641602,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7355,
+      "eval_samples_per_second": 1418.007,
+      "eval_steps_per_second": 89.73,
+      "step": 4276
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 3.358259302376575e-05,
+      "loss": 0.6119,
+      "step": 4500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 2.9438640281445205e-05,
+      "loss": 0.6139,
+      "step": 5000
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 2.529468753912465e-05,
+      "loss": 0.6024,
+      "step": 5500
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 2.1150734796804097e-05,
+      "loss": 0.6212,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6302978992462158,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7272,
+      "eval_samples_per_second": 1434.322,
+      "eval_steps_per_second": 90.762,
+      "step": 6414
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 8552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 97903756622304.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 7.087816770465073e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 4,
+    "seed": 26
+  }
+}

run-2/checkpoint-6414/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:229096f3c02d1f39e318a3c8267e4a335369e6543c5689f448d17d69600fc193
+size 4792

run-2/checkpoint-6414/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-8552/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-8552/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:340c15980550c5b995206b65498be761927b3a862a2ab58aaa141c628b53a492
+size 267832560

run-2/checkpoint-8552/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e586f61c41bc2a820fd3966babc6e1162abad0e2dbcb35c94a75b10f4176ca7
+size 535727290

run-2/checkpoint-8552/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c56ef61e46b089dc2394b3c908da63c3e8fcf6121197a10a5759ab29c02e9a67
+size 14308

run-2/checkpoint-8552/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3202026348d47fc1f55c9a79ddfb9a3bee9e24617c904788562546c66512bbab
+size 1064

run-2/checkpoint-8552/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-8552/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-8552/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-8552/trainer_state.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-2138",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 8552,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23,
+      "learning_rate": 6.673421496233018e-05,
+      "loss": 0.6049,
+      "step": 500
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 6.259026222000962e-05,
+      "loss": 0.6341,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 5.844630947768907e-05,
+      "loss": 0.6276,
+      "step": 1500
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 5.4302356735368516e-05,
+      "loss": 0.614,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.619145393371582,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7293,
+      "eval_samples_per_second": 1430.082,
+      "eval_steps_per_second": 90.494,
+      "step": 2138
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 5.015840399304796e-05,
+      "loss": 0.6223,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 4.601445125072741e-05,
+      "loss": 0.6101,
+      "step": 3000
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 4.187049850840686e-05,
+      "loss": 0.6246,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 3.7726545766086306e-05,
+      "loss": 0.603,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6236200332641602,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7355,
+      "eval_samples_per_second": 1418.007,
+      "eval_steps_per_second": 89.73,
+      "step": 4276
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 3.358259302376575e-05,
+      "loss": 0.6119,
+      "step": 4500
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 2.9438640281445205e-05,
+      "loss": 0.6139,
+      "step": 5000
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 2.529468753912465e-05,
+      "loss": 0.6024,
+      "step": 5500
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 2.1150734796804097e-05,
+      "loss": 0.6212,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6302978992462158,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7272,
+      "eval_samples_per_second": 1434.322,
+      "eval_steps_per_second": 90.762,
+      "step": 6414
+    },
+    {
+      "epoch": 3.04,
+      "learning_rate": 1.7006782054483546e-05,
+      "loss": 0.6063,
+      "step": 6500
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 1.2862829312162994e-05,
+      "loss": 0.6178,
+      "step": 7000
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 8.718876569842442e-06,
+      "loss": 0.6036,
+      "step": 7500
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 4.57492382752189e-06,
+      "loss": 0.6152,
+      "step": 8000
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 4.309710852013374e-07,
+      "loss": 0.6204,
+      "step": 8500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.622022807598114,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.734,
+      "eval_samples_per_second": 1420.905,
+      "eval_steps_per_second": 89.913,
+      "step": 8552
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 8552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "total_flos": 138656627387736.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 7.087816770465073e-05,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 4,
+    "seed": 26
+  }
+}

run-2/checkpoint-8552/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:229096f3c02d1f39e318a3c8267e4a335369e6543c5689f448d17d69600fc193
+size 4792

run-2/checkpoint-8552/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-268/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-3/checkpoint-268/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:150ba353979b873044b17ceef2fffff6b2a3ccb670e9ef0199cb02e9c66e4215
+size 267832560

run-3/checkpoint-268/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0e954cb6468f51528c261281bda0da0e886099eba86a9b9f4b783cceb570b28
+size 535727290

run-3/checkpoint-268/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b01dc255a18436c8560496aea22260af41c12a0f68311fdf1b6cd1946be72458
+size 14244

run-3/checkpoint-268/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8dc0981832d41b0fe5f46e7c129125db763f9bc64a8a4119bb96e0bce286c2a
+size 1064

run-3/checkpoint-268/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-268/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-268/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-268/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-3/checkpoint-268",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 268,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6172963976860046,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 0.7451,
+      "eval_samples_per_second": 1399.774,
+      "eval_steps_per_second": 88.576,
+      "step": 268
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 268,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 1.2470743887867703e-06,
+    "num_train_epochs": 1,
+    "per_device_train_batch_size": 32,
+    "seed": 30
+  }
+}

run-3/checkpoint-268/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9770c37754e611c4da0116a491a990826b1fa32e6cc67c1a4453d8081c5ed81
+size 4792

run-3/checkpoint-268/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-1069/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-4/checkpoint-1069/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bda67fd208486255904ee208ecc44ab85ef0893f75636e18f3f40e942f21bb2
+size 267832560

run-4/checkpoint-1069/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a9f3f9a683762457dcdfda821b14d090bd680c960327043779831378ff5cbca
+size 535727290

run-4/checkpoint-1069/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e4cad68f538e322f5ad28e53ea5dc75b8b122dffa1e894f8e631e297c02125d
+size 14244

run-4/checkpoint-1069/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:924c2fd17d36d37418c9bc7275af6fd2181a11fd0e15ae4806bfe012db841523
+size 1064

run-4/checkpoint-1069/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-1069/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-1069/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-4/checkpoint-1069/trainer_state.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "best_metric": 0.3636302567913777,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-4/checkpoint-1069",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1069,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.47,
+      "learning_rate": 2.7705893400665e-05,
+      "loss": 0.5565,
+      "step": 500
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 2.258843987265152e-05,
+      "loss": 0.506,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.560174822807312,
+      "eval_matthews_correlation": 0.3636302567913777,
+      "eval_runtime": 0.7427,
+      "eval_samples_per_second": 1404.306,
+      "eval_steps_per_second": 88.863,
+      "step": 1069
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3207,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 37738719995232.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.282334692867849e-05,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 8,
+    "seed": 19
+  }
+}

run-4/checkpoint-1069/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b0036dfb72bd09fdd49fd778933781c9b7dc1c449bbd910174da63c43446a5a
+size 4792

run-4/checkpoint-1069/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-2138/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "vocab_size": 30522
+}

run-4/checkpoint-2138/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c488c42dc2c418a8f02d0f828d173aec54f3465841e9120855e2070114d7078
+size 267832560

run-4/checkpoint-2138/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:94abd5bfd0970fa86ad3b0b15dcb4a953c0e9b28f7caed22ef5b98b93e19f84a
+size 535727290

run-4/checkpoint-2138/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e482e252db8f2a452e6b9d9668677605f33af9fa4d7f2eefd41a816584ace9fc
+size 14244

run-4/checkpoint-2138/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6339d4a1bf884319f582128c855791b1af6bee514a30c8d2f479d75a27ee816
+size 1064