Training in progress, step 500

Browse files

Files changed (15) hide show

config.json +26 -26
model.safetensors +1 -1
run-0/checkpoint-500/config.json +26 -26
run-0/checkpoint-500/model.safetensors +1 -1
run-0/checkpoint-500/optimizer.pt +2 -2
run-0/checkpoint-500/rng_state.pth +2 -2
run-0/checkpoint-500/scheduler.pt +1 -1
run-0/checkpoint-500/special_tokens_map.json +7 -7
run-0/checkpoint-500/tokenizer_config.json +55 -55
run-0/checkpoint-500/trainer_state.json +43 -31
run-0/checkpoint-500/training_args.bin +2 -2
runs/Jun05_16-09-47_Donghao-Win/events.out.tfevents.1717574992.Donghao-Win.8196.0 +3 -0
special_tokens_map.json +7 -7
tokenizer_config.json +55 -55
training_args.bin +2 -2

config.json CHANGED Viewed

@@ -1,26 +1,26 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForMultipleChoice"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "torch_dtype": "float32",
-  "transformers_version": "4.40.1",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "BertForMultipleChoice"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6ee4c4d9478e0e67fd932bb33dd4a77a7d04778a8dd602526c5c88c515b3cbc
 size 437955572

 version https://git-lfs.github.com/spec/v1
+oid sha256:a49923d6a27a25a1e44c5bd0774551290dcf1419f0a8f312d5924957b74dcebd
 size 437955572

run-0/checkpoint-500/config.json CHANGED Viewed

@@ -1,26 +1,26 @@
-{
-  "_name_or_path": "bert-base-uncased",
-  "architectures": [
-    "BertForMultipleChoice"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "classifier_dropout": null,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 512,
-  "model_type": "bert",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 0,
-  "position_embedding_type": "absolute",
-  "torch_dtype": "float32",
-  "transformers_version": "4.40.1",
-  "type_vocab_size": 2,
-  "use_cache": true,
-  "vocab_size": 30522
-}

+{
+  "_name_or_path": "bert-base-uncased",
+  "architectures": [
+    "BertForMultipleChoice"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-0/checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b69bca03697f591be1f91ed115752159e8cab342832a7fa0b7924bb5d33ec207
 size 437955572

 version https://git-lfs.github.com/spec/v1
+oid sha256:a49923d6a27a25a1e44c5bd0774551290dcf1419f0a8f312d5924957b74dcebd
 size 437955572

run-0/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57a711048fbab27a48d38d6b53b6524caeb3d845e8813f0f1b800880bee4630b
-size 876026618

 version https://git-lfs.github.com/spec/v1
+oid sha256:449190f05ac53cccade6ffb7776a3231e9d4ef7cf6923803691bfc08ceac4c71
+size 876032250

run-0/checkpoint-500/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b06dd8120218bfe4328bbf5c9f119a6ff29368defd245db5bded935105bb4b16
-size 13990

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a5a554a6a0caa6bc70d99f85df9b76b92ec635bcf7bf27f721c0541d4244d39
+size 14244

run-0/checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0fe64e97bdb858b23a46cd8fc0ae46d424130cbba859196a0e470187143d72f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cb5f2b859ba2016887b956774872190dbfe7a341916b206bfe258f8f6cbd2a0
 size 1064

run-0/checkpoint-500/special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,7 @@
-{
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
-}

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-500/tokenizer_config.json CHANGED Viewed

@@ -1,55 +1,55 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
-}

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-0/checkpoint-500/trainer_state.json CHANGED Viewed

@@ -1,31 +1,43 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.6702412868632708,
-  "eval_steps": 500,
-  "global_step": 500,
-  "is_hyper_param_search": true,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.6702412868632708,
-      "grad_norm": 4.722346305847168,
-      "learning_rate": 2.501649569639987e-06,
-      "loss": 0.6173,
-      "step": 500
-    }
-  ],
-  "logging_steps": 500,
-  "max_steps": 7460,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
-  "save_steps": 500,
-  "total_flos": 337338233002368.0,
-  "train_batch_size": 16,
-  "trial_name": null,
-  "trial_params": {
-    "learning_rate": 2.681365774355503e-06,
-    "per_device_train_batch_size": 16
-  }
-}

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.6702412868632708,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.6702412868632708,
+      "grad_norm": 5.81724739074707,
+      "learning_rate": 3.990521754275675e-05,
+      "loss": 0.5455,
+      "step": 500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 7460,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 336598239803328.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 4.2771971676575485e-05,
+    "per_device_train_batch_size": 16
+  }
+}

run-0/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d395faff7fe5da348192feab053639cba754ab05cac8cf8d9e5530c47ddc855
-size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e48d5b36e733b257aff4aaa0a0b7dc42448b042d96f9c438a05b4afa15b9405
+size 5176

runs/Jun05_16-09-47_Donghao-Win/events.out.tfevents.1717574992.Donghao-Win.8196.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ee0de2f098ae451270f607ea7b17e3feb6cbd496c6fc1b851a9bf84f6f2ad25
+size 5097

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,7 @@
-{
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
-}

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json CHANGED Viewed

@@ -1,55 +1,55 @@
-{
-  "added_tokens_decoder": {
-    "0": {
-      "content": "[PAD]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "100": {
-      "content": "[UNK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "101": {
-      "content": "[CLS]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "102": {
-      "content": "[SEP]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "103": {
-      "content": "[MASK]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "[CLS]",
-  "do_lower_case": true,
-  "mask_token": "[MASK]",
-  "model_max_length": 512,
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "strip_accents": null,
-  "tokenize_chinese_chars": true,
-  "tokenizer_class": "BertTokenizer",
-  "unk_token": "[UNK]"
-}

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6a3f23e8c0adcd3ef473b2fa25280f07a078cd15d870a418c0ade70640bca2b
-size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e48d5b36e733b257aff4aaa0a0b7dc42448b042d96f9c438a05b4afa15b9405
+size 5176