anamelchor commited on Mar 3

Commit

886534d

•

1 Parent(s): 4f8b238

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-7/checkpoint-1072/config.json +25 -0
run-7/checkpoint-1072/model.safetensors +3 -0
run-7/checkpoint-1072/optimizer.pt +3 -0
run-7/checkpoint-1072/rng_state.pth +3 -0
run-7/checkpoint-1072/scheduler.pt +3 -0
run-7/checkpoint-1072/special_tokens_map.json +7 -0
run-7/checkpoint-1072/tokenizer.json +0 -0
run-7/checkpoint-1072/tokenizer_config.json +55 -0
run-7/checkpoint-1072/trainer_state.json +76 -0
run-7/checkpoint-1072/training_args.bin +3 -0
run-7/checkpoint-1072/vocab.txt +0 -0
run-7/checkpoint-1340/config.json +25 -0
run-7/checkpoint-1340/model.safetensors +3 -0
run-7/checkpoint-1340/optimizer.pt +3 -0
run-7/checkpoint-1340/rng_state.pth +3 -0
run-7/checkpoint-1340/scheduler.pt +3 -0
run-7/checkpoint-1340/special_tokens_map.json +7 -0
run-7/checkpoint-1340/tokenizer.json +0 -0
run-7/checkpoint-1340/tokenizer_config.json +55 -0
run-7/checkpoint-1340/trainer_state.json +85 -0
run-7/checkpoint-1340/training_args.bin +3 -0
run-7/checkpoint-1340/vocab.txt +0 -0
run-7/checkpoint-268/config.json +25 -0
run-7/checkpoint-268/model.safetensors +3 -0
run-7/checkpoint-268/optimizer.pt +3 -0
run-7/checkpoint-268/rng_state.pth +3 -0
run-7/checkpoint-268/scheduler.pt +3 -0
run-7/checkpoint-268/special_tokens_map.json +7 -0
run-7/checkpoint-268/tokenizer.json +0 -0
run-7/checkpoint-268/tokenizer_config.json +55 -0
run-7/checkpoint-268/trainer_state.json +35 -0
run-7/checkpoint-268/training_args.bin +3 -0
run-7/checkpoint-268/vocab.txt +0 -0
run-7/checkpoint-536/config.json +25 -0
run-7/checkpoint-536/model.safetensors +3 -0
run-7/checkpoint-536/optimizer.pt +3 -0
run-7/checkpoint-536/rng_state.pth +3 -0
run-7/checkpoint-536/scheduler.pt +3 -0
run-7/checkpoint-536/special_tokens_map.json +7 -0
run-7/checkpoint-536/tokenizer.json +0 -0
run-7/checkpoint-536/tokenizer_config.json +55 -0
run-7/checkpoint-536/trainer_state.json +51 -0
run-7/checkpoint-536/training_args.bin +3 -0
run-7/checkpoint-536/vocab.txt +0 -0
run-7/checkpoint-804/config.json +25 -0
run-7/checkpoint-804/model.safetensors +3 -0
run-7/checkpoint-804/optimizer.pt +3 -0
run-7/checkpoint-804/rng_state.pth +3 -0
run-7/checkpoint-804/scheduler.pt +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08a866a5fb3d0faf53480d212a8ae192d353fe85f16b1a73d0a7f8ad1a00868c
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:c183ff2fd02f9a631531f466f5f0645fee3e51d7ced576aa425e91a5aa6055cc
 size 267832560

run-7/checkpoint-1072/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-1072/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eea95e057bec692f826e98c2f746e613afacc745dcee4c5f0a38854ba81ec142
+size 267832560

run-7/checkpoint-1072/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5762bc02b63bd17f2415b51d39c56de88ce177ddfb14298e15b5b3bae9a104e
+size 535727290

run-7/checkpoint-1072/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6df72fd84dfd0e5269221a289ddb79e7c7b27843ec222ab15714ae7f5163bf4e
+size 14308

run-7/checkpoint-1072/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77aebab30a5aee5c39d7dd5d92e6e0f08f879a0df96ace9a252e90e8c2ece8b5
+size 1064

run-7/checkpoint-1072/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1072/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1072/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1072/trainer_state.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "best_metric": 0.5276877632205236,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-804",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 1072,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5216242074966431,
+      "eval_matthews_correlation": 0.38963004002937796,
+      "eval_runtime": 0.7221,
+      "eval_samples_per_second": 1444.321,
+      "eval_steps_per_second": 91.395,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 8.266988754272461,
+      "learning_rate": 1.5079418524244762e-05,
+      "loss": 0.4418,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5111855268478394,
+      "eval_matthews_correlation": 0.4979366411311316,
+      "eval_runtime": 0.8836,
+      "eval_samples_per_second": 1180.395,
+      "eval_steps_per_second": 74.694,
+      "step": 536
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5452266335487366,
+      "eval_matthews_correlation": 0.5276877632205236,
+      "eval_runtime": 0.7934,
+      "eval_samples_per_second": 1314.544,
+      "eval_steps_per_second": 83.183,
+      "step": 804
+    },
+    {
+      "epoch": 3.73,
+      "grad_norm": 7.780147075653076,
+      "learning_rate": 6.103574164575261e-06,
+      "loss": 0.211,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.6031964421272278,
+      "eval_matthews_correlation": 0.5197571248000596,
+      "eval_runtime": 0.7831,
+      "eval_samples_per_second": 1331.863,
+      "eval_steps_per_second": 84.279,
+      "step": 1072
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1340,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 192139563418932.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.4055262883914262e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 29
+  }
+}

run-7/checkpoint-1072/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34ca87411a20fbc048b2ded579321861753bc8a30c47fb67c9183b88b98abac5
+size 4984

run-7/checkpoint-1072/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1340/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-1340/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3687055fa18b6acd59a8da4fd51e2616d33429e47fd1cd6dd0c69d49b4b8897
+size 267832560

run-7/checkpoint-1340/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:826268078209772b66010f42e3b3876566d4eb8bc1261e60763bf8aa83b066cc
+size 535727290

run-7/checkpoint-1340/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bb1c4d382a763478143976cf214c95fee682e8f67b7e101085a313c525196cd
+size 14308

run-7/checkpoint-1340/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19bb3bf66c193fe69e683b92dd30afbac9bfd7334aeeb6703e7f2c18cd83e509
+size 1064

run-7/checkpoint-1340/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1340/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-1340/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-1340/trainer_state.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "best_metric": 0.5276877632205236,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-804",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1340,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5216242074966431,
+      "eval_matthews_correlation": 0.38963004002937796,
+      "eval_runtime": 0.7221,
+      "eval_samples_per_second": 1444.321,
+      "eval_steps_per_second": 91.395,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 8.266988754272461,
+      "learning_rate": 1.5079418524244762e-05,
+      "loss": 0.4418,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5111855268478394,
+      "eval_matthews_correlation": 0.4979366411311316,
+      "eval_runtime": 0.8836,
+      "eval_samples_per_second": 1180.395,
+      "eval_steps_per_second": 74.694,
+      "step": 536
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.5452266335487366,
+      "eval_matthews_correlation": 0.5276877632205236,
+      "eval_runtime": 0.7934,
+      "eval_samples_per_second": 1314.544,
+      "eval_steps_per_second": 83.183,
+      "step": 804
+    },
+    {
+      "epoch": 3.73,
+      "grad_norm": 7.780147075653076,
+      "learning_rate": 6.103574164575261e-06,
+      "loss": 0.211,
+      "step": 1000
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.6031964421272278,
+      "eval_matthews_correlation": 0.5197571248000596,
+      "eval_runtime": 0.7831,
+      "eval_samples_per_second": 1331.863,
+      "eval_steps_per_second": 84.279,
+      "step": 1072
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.6737221479415894,
+      "eval_matthews_correlation": 0.5241753618868132,
+      "eval_runtime": 0.8005,
+      "eval_samples_per_second": 1302.958,
+      "eval_steps_per_second": 82.45,
+      "step": 1340
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1340,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 192139563418932.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.4055262883914262e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 29
+  }
+}

run-7/checkpoint-1340/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34ca87411a20fbc048b2ded579321861753bc8a30c47fb67c9183b88b98abac5
+size 4984

run-7/checkpoint-1340/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-268/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-268/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08a866a5fb3d0faf53480d212a8ae192d353fe85f16b1a73d0a7f8ad1a00868c
+size 267832560

run-7/checkpoint-268/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7fadad1aeb8c5ba0a76c9f9c1720e6378a8459447bd871507e281e780643193
+size 535727290

run-7/checkpoint-268/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd000c938e3d30c577de1f43a374adcee943887d3eb1ff70096228bc3c559a0b
+size 14308

run-7/checkpoint-268/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3284a4190f7984a66705854e71b04587ac1290449ffd26d97153118a45813c29
+size 1064

run-7/checkpoint-268/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-268/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-268/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-268/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.38963004002937796,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-268",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 268,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5216242074966431,
+      "eval_matthews_correlation": 0.38963004002937796,
+      "eval_runtime": 0.7221,
+      "eval_samples_per_second": 1444.321,
+      "eval_steps_per_second": 91.395,
+      "step": 268
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1340,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.4055262883914262e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 29
+  }
+}

run-7/checkpoint-268/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34ca87411a20fbc048b2ded579321861753bc8a30c47fb67c9183b88b98abac5
+size 4984

run-7/checkpoint-268/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-536/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-536/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a43401d391b55db1bd62a58e993ad75579905597ff219f85ea57d718d1bc6f0b
+size 267832560

run-7/checkpoint-536/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35a92277930eba821c689877afadb1bfccce40e7b529610922741c681ab7bbfe
+size 535727290

run-7/checkpoint-536/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e55dca5f6e18cb2e2edf00546b9892403181a8e61574324cf699dc072017f3f4
+size 14308

run-7/checkpoint-536/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09b015e71c6132fbf4a878642fb873800156f5a44c889138f1aaf90072850ffe
+size 1064

run-7/checkpoint-536/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-536/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-536/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-7/checkpoint-536/trainer_state.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "best_metric": 0.4979366411311316,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-7/checkpoint-536",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 536,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5216242074966431,
+      "eval_matthews_correlation": 0.38963004002937796,
+      "eval_runtime": 0.7221,
+      "eval_samples_per_second": 1444.321,
+      "eval_steps_per_second": 91.395,
+      "step": 268
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 8.266988754272461,
+      "learning_rate": 1.5079418524244762e-05,
+      "loss": 0.4418,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.5111855268478394,
+      "eval_matthews_correlation": 0.4979366411311316,
+      "eval_runtime": 0.8836,
+      "eval_samples_per_second": 1180.395,
+      "eval_steps_per_second": 74.694,
+      "step": 536
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1340,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 95606792627640.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.4055262883914262e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 32,
+    "seed": 29
+  }
+}

run-7/checkpoint-536/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34ca87411a20fbc048b2ded579321861753bc8a30c47fb67c9183b88b98abac5
+size 4984

run-7/checkpoint-536/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-7/checkpoint-804/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-7/checkpoint-804/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b727e0dea226c19a8f0ec429e971179ae3f2c92764880785326a6a96d3f04d1
+size 267832560

run-7/checkpoint-804/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2b470feaab1917bbecba17fd9db1a0a12c9c464c6e5d390f5112ee52cfeb91b
+size 535727290

run-7/checkpoint-804/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:edf221826c4bdc590eced119bb710d55452db0873dc17295fc4107d038e0ee93
+size 14308

run-7/checkpoint-804/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bc7c877e8a1cf585af7e0ff0e3210412bdc3436ec385584fd354c056b1c99a2
+size 1064