kbberendsen commited on Feb 29

Commit

f4c38c7

•

1 Parent(s): a54873c

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

model.safetensors +1 -1
run-0/checkpoint-428/added_tokens.json +3 -0
run-0/checkpoint-428/config.json +35 -0
run-0/checkpoint-428/model.safetensors +3 -0
run-0/checkpoint-428/optimizer.pt +3 -0
run-0/checkpoint-428/rng_state.pth +3 -0
run-0/checkpoint-428/scheduler.pt +3 -0
run-0/checkpoint-428/special_tokens_map.json +15 -0
run-0/checkpoint-428/spm.model +3 -0
run-0/checkpoint-428/tokenizer.json +0 -0
run-0/checkpoint-428/tokenizer_config.json +58 -0
run-0/checkpoint-428/trainer_state.json +44 -0
run-0/checkpoint-428/training_args.bin +3 -0
run-0/checkpoint-642/added_tokens.json +3 -0
run-0/checkpoint-642/config.json +35 -0
run-0/checkpoint-642/model.safetensors +3 -0
run-0/checkpoint-642/optimizer.pt +3 -0
run-0/checkpoint-642/rng_state.pth +3 -0
run-0/checkpoint-642/scheduler.pt +3 -0
run-0/checkpoint-642/special_tokens_map.json +15 -0
run-0/checkpoint-642/spm.model +3 -0
run-0/checkpoint-642/tokenizer.json +0 -0
run-0/checkpoint-642/tokenizer_config.json +58 -0
run-0/checkpoint-642/trainer_state.json +60 -0
run-0/checkpoint-642/training_args.bin +3 -0
run-1/checkpoint-107/added_tokens.json +3 -0
run-1/checkpoint-107/config.json +35 -0
run-1/checkpoint-107/model.safetensors +3 -0
run-1/checkpoint-107/optimizer.pt +3 -0
run-1/checkpoint-107/rng_state.pth +3 -0
run-1/checkpoint-107/scheduler.pt +3 -0
run-1/checkpoint-107/special_tokens_map.json +15 -0
run-1/checkpoint-107/spm.model +3 -0
run-1/checkpoint-107/tokenizer.json +0 -0
run-1/checkpoint-107/tokenizer_config.json +58 -0
run-1/checkpoint-107/trainer_state.json +35 -0
run-1/checkpoint-107/training_args.bin +3 -0
run-1/checkpoint-214/added_tokens.json +3 -0
run-1/checkpoint-214/config.json +35 -0
run-1/checkpoint-214/model.safetensors +3 -0
run-1/checkpoint-214/optimizer.pt +3 -0
run-1/checkpoint-214/rng_state.pth +3 -0
run-1/checkpoint-214/scheduler.pt +3 -0
run-1/checkpoint-214/special_tokens_map.json +15 -0
run-1/checkpoint-214/spm.model +3 -0
run-1/checkpoint-214/tokenizer.json +0 -0
run-1/checkpoint-214/tokenizer_config.json +58 -0
run-1/checkpoint-214/trainer_state.json +44 -0
run-1/checkpoint-214/training_args.bin +3 -0
run-1/checkpoint-321/added_tokens.json +3 -0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdec6bb9c0432f0f0d3611082fb01d02612f7ac48e46f7729aad7ea87bc2536d
 size 1740304440

 version https://git-lfs.github.com/spec/v1
+oid sha256:c791636b6c132216c0275031a093f9924ff93001c41c5f02825e25ebde0b15b8
 size 1740304440

run-0/checkpoint-428/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

run-0/checkpoint-428/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

run-0/checkpoint-428/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bdec6bb9c0432f0f0d3611082fb01d02612f7ac48e46f7729aad7ea87bc2536d
+size 1740304440

run-0/checkpoint-428/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:039ae4a93503f0e86604c57f74e58d3a981752399da500b5713c69afd840bb85
+size 3480840240

run-0/checkpoint-428/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:382cc1499ecec0a6997714e1b44973ee0f519ab12a71665011d86b399626389d
+size 14180

run-0/checkpoint-428/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b0ead5cb401a93d13f1d908ee82433b3953069bc9dd819def7b68908c135a0
+size 1064

run-0/checkpoint-428/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-0/checkpoint-428/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

run-0/checkpoint-428/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-428/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

run-0/checkpoint-428/trainer_state.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-0/checkpoint-214",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 428,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.629180371761322,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 8.504,
+      "eval_samples_per_second": 122.648,
+      "eval_steps_per_second": 7.761,
+      "step": 214
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6256847977638245,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 8.3756,
+      "eval_samples_per_second": 124.528,
+      "eval_steps_per_second": 7.88,
+      "step": 428
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 642,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 7.918203646398832e-05,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 4,
+    "seed": 10
+  }
+}

run-0/checkpoint-428/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d79fe4a0a51daabafa98035e12f3a934594a2c4997a8e788c80ce53c66ff9ac
+size 4984

run-0/checkpoint-642/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

run-0/checkpoint-642/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

run-0/checkpoint-642/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:954bcbe4c078746932c0591bb725b2fa501e48671e9e19c15ab3c779abc09d5c
+size 1740304440

run-0/checkpoint-642/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed6cf6f89ade3563eb1a705c49a68a0efad33065529a8ea4b9799940f06ff134
+size 3480840240

run-0/checkpoint-642/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec84766a430335b58cc88475667a12f8795011e2074b25b7dad6e61ae234d316
+size 14180

run-0/checkpoint-642/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af3cbe862299694ac340036533eb6d8265375010b27bf50b94a4393662876533
+size 1064

run-0/checkpoint-642/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-0/checkpoint-642/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

run-0/checkpoint-642/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-0/checkpoint-642/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

run-0/checkpoint-642/trainer_state.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "best_metric": 0.0,
+  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-0/checkpoint-214",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 642,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.629180371761322,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 8.504,
+      "eval_samples_per_second": 122.648,
+      "eval_steps_per_second": 7.761,
+      "step": 214
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6256847977638245,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 8.3756,
+      "eval_samples_per_second": 124.528,
+      "eval_steps_per_second": 7.88,
+      "step": 428
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 4.587509632110596,
+      "learning_rate": 1.75137837661781e-05,
+      "loss": 0.6414,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6201117038726807,
+      "eval_matthews_correlation": 0.0,
+      "eval_runtime": 14.3558,
+      "eval_samples_per_second": 72.653,
+      "eval_steps_per_second": 4.597,
+      "step": 642
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 642,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 56862758467680.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 7.918203646398832e-05,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 4,
+    "seed": 10
+  }
+}

run-0/checkpoint-642/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d79fe4a0a51daabafa98035e12f3a934594a2c4997a8e788c80ce53c66ff9ac
+size 4984

run-1/checkpoint-107/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

run-1/checkpoint-107/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

run-1/checkpoint-107/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbb97239ee420701eea12e8830ef6a3b67568c0245ba25d9b21546298a03272d
+size 1740304440

run-1/checkpoint-107/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9676e382729f07ecf07136cb23602c76a3092cfec665305ef55baedf288acd83
+size 3480840240

run-1/checkpoint-107/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:424d332af8f2f946ca851e4c26be91043e0d216824beee0b88ad7607cef38475
+size 14244

run-1/checkpoint-107/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09df6bdf5ddeda215d241b6da8cb2bb237d5a55482032c73214c2c2065aa975a
+size 1064

run-1/checkpoint-107/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-1/checkpoint-107/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

run-1/checkpoint-107/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-107/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

run-1/checkpoint-107/trainer_state.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "best_metric": 0.6352032367163551,
+  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-1/checkpoint-107",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 107,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.3989429175853729,
+      "eval_matthews_correlation": 0.6352032367163551,
+      "eval_runtime": 8.55,
+      "eval_samples_per_second": 121.988,
+      "eval_steps_per_second": 7.719,
+      "step": 107
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 535,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.359139262812914e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 8,
+    "seed": 19
+  }
+}

run-1/checkpoint-107/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35505d279768e84df3a864aadde8b9ddc830b1c914325af6898748074b37f0f9
+size 4984

run-1/checkpoint-214/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

run-1/checkpoint-214/config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

run-1/checkpoint-214/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f501fe671ae27a7ec05642a22603319c16318b1623d9756f034244949a69636
+size 1740304440

run-1/checkpoint-214/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6c4e23ad84d3a34d9ed8a157848ebb058df2f2b627b094448f996b5f5ad56d6
+size 3480840240

run-1/checkpoint-214/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3a3f1d57224f08391b2e3d0d3fb832eb9751de6bd3dc7098d72105774823d3
+size 14244

run-1/checkpoint-214/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b02aa1dfe75cb263d8f4be8c11befd364dd06e5bd93e12cd10caaf48efeb05f
+size 1064

run-1/checkpoint-214/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

run-1/checkpoint-214/spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

run-1/checkpoint-214/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-1/checkpoint-214/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

run-1/checkpoint-214/trainer_state.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "best_metric": 0.6352032367163551,
+  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-1/checkpoint-107",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 214,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.3989429175853729,
+      "eval_matthews_correlation": 0.6352032367163551,
+      "eval_runtime": 8.55,
+      "eval_samples_per_second": 121.988,
+      "eval_steps_per_second": 7.719,
+      "step": 107
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7654077410697937,
+      "eval_matthews_correlation": 0.6243387594128297,
+      "eval_runtime": 8.6801,
+      "eval_samples_per_second": 120.16,
+      "eval_steps_per_second": 7.604,
+      "step": 214
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 535,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 2.359139262812914e-05,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 8,
+    "seed": 19
+  }
+}

run-1/checkpoint-214/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:35505d279768e84df3a864aadde8b9ddc830b1c914325af6898748074b37f0f9
+size 4984

run-1/checkpoint-321/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}