End of training

Browse files

Files changed (7) hide show

README.md +59 -0
adapter_model.safetensors +1 -1
all_results.json +8 -0
runs/Mar30_23-17-37_ae9751f0a8f7/events.out.tfevents.1711840657.ae9751f0a8f7.1547.1 +2 -2
runs/Mar30_23-17-37_ae9751f0a8f7/events.out.tfevents.1711847454.ae9751f0a8f7.1547.2 +3 -0
train_results.json +8 -0
trainer_state.json +226 -0

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: meta-llama/Llama-2-7b-chat-hf
+model-index:
+- name: fine_tuned_llama2_7b
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# fine_tuned_llama2_7b
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.3496
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 4
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+- mixed_precision_training: Native AMP
+### Training results
+### Framework versions
+- PEFT 0.10.0
+- Transformers 4.38.2
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80b3c689b19e8c67e4b5ca8233a24cb45bfc80bd4b028b9bbe5637a14c4437c0
 size 1279323952

 version https://git-lfs.github.com/spec/v1
+oid sha256:a46347461bae1c3fbf43643670e8c95fe56bbfb17586a9cc2f3acaa76b05e0f4
 size 1279323952

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 1.1131515504795648e+16,
+    "train_loss": 2.3624265621191913,
+    "train_runtime": 6766.2575,
+    "train_samples_per_second": 1.706,
+    "train_steps_per_second": 0.426
+}

runs/Mar30_23-17-37_ae9751f0a8f7/events.out.tfevents.1711840657.ae9751f0a8f7.1547.1 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ef3246830d1985361ab539aa2fc5cee259ca80ffe7b097b1787683795fc3165
-size 10330

 version https://git-lfs.github.com/spec/v1
+oid sha256:13c5f3497a00be5f3c0eda5194b4cecc7708f3ea890be734a0c173c64a9d66b9
+size 11317

runs/Mar30_23-17-37_ae9751f0a8f7/events.out.tfevents.1711847454.ae9751f0a8f7.1547.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06431d4495e3d730f284d84b7e79ba36e022c244928527eda05b3070b5be38cf
+size 359

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "total_flos": 1.1131515504795648e+16,
+    "train_loss": 2.3624265621191913,
+    "train_runtime": 6766.2575,
+    "train_samples_per_second": 1.706,
+    "train_steps_per_second": 0.426
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,226 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9997401022264576,
+  "eval_steps": 500,
+  "global_step": 2885,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "grad_norm": 22.53300666809082,
+      "learning_rate": 6.71280276816609e-06,
+      "loss": 3.1491,
+      "step": 100
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 36.242984771728516,
+      "learning_rate": 1.356401384083045e-05,
+      "loss": 2.6131,
+      "step": 200
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 25.216327667236328,
+      "learning_rate": 1.999973639055537e-05,
+      "loss": 2.5726,
+      "step": 300
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 14.273802757263184,
+      "learning_rate": 1.9917836961775225e-05,
+      "loss": 2.4989,
+      "step": 400
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 27.216812133789062,
+      "learning_rate": 1.969086765436979e-05,
+      "loss": 2.5906,
+      "step": 500
+    },
+    {
+      "epoch": 0.21,
+      "grad_norm": 18.74100112915039,
+      "learning_rate": 1.9322148386785378e-05,
+      "loss": 2.4275,
+      "step": 600
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 20.627084732055664,
+      "learning_rate": 1.8817072478109763e-05,
+      "loss": 2.5103,
+      "step": 700
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 15.611855506896973,
+      "learning_rate": 1.818302775908169e-05,
+      "loss": 2.3706,
+      "step": 800
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 25.303524017333984,
+      "learning_rate": 1.7429288509041197e-05,
+      "loss": 2.3601,
+      "step": 900
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 20.18657875061035,
+      "learning_rate": 1.6566879799477148e-05,
+      "loss": 2.5054,
+      "step": 1000
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 17.65004539489746,
+      "learning_rate": 1.560841622844192e-05,
+      "loss": 2.3717,
+      "step": 1100
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 19.5482177734375,
+      "learning_rate": 1.4578679381126853e-05,
+      "loss": 2.3772,
+      "step": 1200
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 14.92688274383545,
+      "learning_rate": 1.3471954275891059e-05,
+      "loss": 2.2991,
+      "step": 1300
+    },
+    {
+      "epoch": 0.49,
+      "grad_norm": 10.425432205200195,
+      "learning_rate": 1.2314444308256605e-05,
+      "loss": 2.2865,
+      "step": 1400
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 16.403301239013672,
+      "learning_rate": 1.1123080572287608e-05,
+      "loss": 2.2595,
+      "step": 1500
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 11.935959815979004,
+      "learning_rate": 9.915289346843219e-06,
+      "loss": 2.3662,
+      "step": 1600
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 18.410987854003906,
+      "learning_rate": 8.708737198449509e-06,
+      "loss": 2.2021,
+      "step": 1700
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 15.293601036071777,
+      "learning_rate": 7.521072569442963e-06,
+      "loss": 2.2545,
+      "step": 1800
+    },
+    {
+      "epoch": 0.66,
+      "grad_norm": 16.34610939025879,
+      "learning_rate": 6.369667631219584e-06,
+      "loss": 2.3199,
+      "step": 1900
+    },
+    {
+      "epoch": 0.69,
+      "grad_norm": 15.948208808898926,
+      "learning_rate": 5.2713641785457504e-06,
+      "loss": 2.2029,
+      "step": 2000
+    },
+    {
+      "epoch": 0.73,
+      "grad_norm": 27.17706298828125,
+      "learning_rate": 4.242227281777747e-06,
+      "loss": 2.2861,
+      "step": 2100
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 19.407489776611328,
+      "learning_rate": 3.297310300360622e-06,
+      "loss": 2.2157,
+      "step": 2200
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 11.622710227966309,
+      "learning_rate": 2.450434694793621e-06,
+      "loss": 2.2724,
+      "step": 2300
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 16.701732635498047,
+      "learning_rate": 1.7139878577898772e-06,
+      "loss": 2.1622,
+      "step": 2400
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 10.720149040222168,
+      "learning_rate": 1.0987419217881333e-06,
+      "loss": 2.2026,
+      "step": 2500
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 14.398381233215332,
+      "learning_rate": 6.136961931496943e-07,
+      "loss": 2.2619,
+      "step": 2600
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 16.95086669921875,
+      "learning_rate": 2.6594551778223896e-07,
+      "loss": 2.2626,
+      "step": 2700
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 12.132495880126953,
+      "learning_rate": 6.057650362879753e-08,
+      "loss": 2.1139,
+      "step": 2800
+    },
+    {
+      "epoch": 1.0,
+      "step": 2885,
+      "total_flos": 1.1131515504795648e+16,
+      "train_loss": 2.3624265621191913,
+      "train_runtime": 6766.2575,
+      "train_samples_per_second": 1.706,
+      "train_steps_per_second": 0.426
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 2885,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "total_flos": 1.1131515504795648e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}