Training in progress, step 500
Browse files- adapter_model.safetensors +1 -1
- runs/May22_05-46-00_92dcc4555414/events.out.tfevents.1716356830.92dcc4555414.24.0 +2 -2
- wandb/debug-internal.log +0 -0
- wandb/run-20240522_054348-vgrzs6jq/files/output.log +38 -0
- wandb/run-20240522_054348-vgrzs6jq/files/wandb-summary.json +1 -1
- wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log +0 -0
- wandb/run-20240522_054348-vgrzs6jq/run-vgrzs6jq.wandb +2 -2
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1279323952
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46c0cd3d256471a4a1a792427492e2a74010b85f101c1392de1e3caf0f5c8237
|
3 |
size 1279323952
|
runs/May22_05-46-00_92dcc4555414/events.out.tfevents.1716356830.92dcc4555414.24.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08d15f9b34e4a9289b089a6426ec43338e227627f3c88d98a8b52cbc76a68229
|
3 |
+
size 43137
|
wandb/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240522_054348-vgrzs6jq/files/output.log
CHANGED
@@ -736,3 +736,41 @@ tokenizer config file saved in /kaggle/working/checkpoint-400/tokenizer_config.j
|
|
736 |
Special tokens file saved in /kaggle/working/checkpoint-400/special_tokens_map.json
|
737 |
tokenizer config file saved in /kaggle/working/tokenizer_config.json
|
738 |
Special tokens file saved in /kaggle/working/special_tokens_map.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
736 |
Special tokens file saved in /kaggle/working/checkpoint-400/special_tokens_map.json
|
737 |
tokenizer config file saved in /kaggle/working/tokenizer_config.json
|
738 |
Special tokens file saved in /kaggle/working/special_tokens_map.json
|
739 |
+
Deleting older checkpoint [/kaggle/working/checkpoint-300] due to args.save_total_limit
|
740 |
+
***** Running Evaluation *****
|
741 |
+
Num examples = 1801
|
742 |
+
Batch size = 1
|
743 |
+
Saving model checkpoint to /kaggle/working/checkpoint-500
|
744 |
+
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-chat-hf/snapshots/f5db02db724555f92da89c216ac04704f23d4590/config.json
|
745 |
+
Model config LlamaConfig {
|
746 |
+
"_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
|
747 |
+
"architectures": [
|
748 |
+
"LlamaForCausalLM"
|
749 |
+
],
|
750 |
+
"attention_bias": false,
|
751 |
+
"attention_dropout": 0.0,
|
752 |
+
"bos_token_id": 1,
|
753 |
+
"eos_token_id": 2,
|
754 |
+
"hidden_act": "silu",
|
755 |
+
"hidden_size": 4096,
|
756 |
+
"initializer_range": 0.02,
|
757 |
+
"intermediate_size": 11008,
|
758 |
+
"max_position_embeddings": 4096,
|
759 |
+
"model_type": "llama",
|
760 |
+
"num_attention_heads": 32,
|
761 |
+
"num_hidden_layers": 32,
|
762 |
+
"num_key_value_heads": 32,
|
763 |
+
"pretraining_tp": 1,
|
764 |
+
"rms_norm_eps": 1e-05,
|
765 |
+
"rope_scaling": null,
|
766 |
+
"rope_theta": 10000.0,
|
767 |
+
"tie_word_embeddings": false,
|
768 |
+
"torch_dtype": "float16",
|
769 |
+
"transformers_version": "4.39.3",
|
770 |
+
"use_cache": true,
|
771 |
+
"vocab_size": 32000
|
772 |
+
}
|
773 |
+
tokenizer config file saved in /kaggle/working/checkpoint-500/tokenizer_config.json
|
774 |
+
Special tokens file saved in /kaggle/working/checkpoint-500/special_tokens_map.json
|
775 |
+
tokenizer config file saved in /kaggle/working/tokenizer_config.json
|
776 |
+
Special tokens file saved in /kaggle/working/special_tokens_map.json
|
wandb/run-20240522_054348-vgrzs6jq/files/wandb-summary.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"train/loss": 0.
|
|
|
1 |
+
{"train/loss": 0.3214, "train/grad_norm": 0.44836246967315674, "train/learning_rate": 1.8213704991625794e-05, "train/rewards/chosen": -13.138799667358398, "train/rewards/rejected": -34.78124237060547, "train/rewards/accuracies": 0.949999988079071, "train/rewards/margins": 21.642440795898438, "train/logps/rejected": -5781.595703125, "train/logps/chosen": -3064.06201171875, "train/logits/rejected": -3.839373826980591, "train/logits/chosen": -3.402714490890503, "train/epoch": 0.28, "train/global_step": 500, "_timestamp": 1716393908.4685614, "_runtime": 37279.8330783844, "_step": 54, "eval/loss": 0.2106475681066513, "eval/runtime": 5069.4045, "eval/samples_per_second": 0.355, "eval/steps_per_second": 0.355, "eval/rewards/chosen": -9.335017204284668, "eval/rewards/rejected": -35.3092041015625, "eval/rewards/accuracies": 0.9583564400672913, "eval/rewards/margins": 25.974185943603516, "eval/logps/rejected": -5814.42333984375, "eval/logps/chosen": -2537.174560546875, "eval/logits/rejected": -3.584986686706543, "eval/logits/chosen": -3.028712272644043}
|
wandb/run-20240522_054348-vgrzs6jq/logs/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20240522_054348-vgrzs6jq/run-vgrzs6jq.wandb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:548b3f2d3fd3aa3693966ecd998c8681a1792787f420961b8a80ec558ad29e06
|
3 |
+
size 1354035
|