SneakyLemon/Llama3LoraCauseEffect

Browse files

Files changed (6) hide show

README.md +63 -26
adapter_config.json +3 -5
adapter_model.safetensors +2 -2
runs/Jun19_19-19-02_1cfbeb3fcc7d/events.out.tfevents.1718824742.1cfbeb3fcc7d.1573.0 +3 -0
runs/Jun19_19-19-28_1cfbeb3fcc7d/events.out.tfevents.1718824769.1cfbeb3fcc7d.1573.1 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.4487
-- F1: 0.8063
 ## Model description
@@ -38,40 +38,77 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 128
 - eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 35
-- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss | F1     |
 |:-------------:|:------:|:----:|:---------------:|:------:|
-| 0.8917        | 0.1368 | 16   | 0.9228          | 0.5606 |
-| 0.8219        | 0.2735 | 32   | 0.7617          | 0.6112 |
-| 0.7154        | 0.4103 | 48   | 0.6455          | 0.6687 |
-| 0.6278        | 0.5470 | 64   | 0.5976          | 0.6955 |
-| 0.5923        | 0.6838 | 80   | 0.5443          | 0.7327 |
-| 0.5417        | 0.8205 | 96   | 0.5212          | 0.7479 |
-| 0.5094        | 0.9573 | 112  | 0.5087          | 0.7586 |
-| 0.4866        | 1.0940 | 128  | 0.4835          | 0.7719 |
-| 0.4743        | 1.2308 | 144  | 0.5172          | 0.7609 |
-| 0.4887        | 1.3675 | 160  | 0.4905          | 0.7718 |
-| 0.452         | 1.5043 | 176  | 0.4706          | 0.7817 |
-| 0.4592        | 1.6410 | 192  | 0.4658          | 0.7795 |
-| 0.4372        | 1.7778 | 208  | 0.4726          | 0.7782 |
-| 0.4387        | 1.9145 | 224  | 0.4769          | 0.7775 |
-| 0.4242        | 2.0513 | 240  | 0.4526          | 0.7929 |
-| 0.3881        | 2.1880 | 256  | 0.4541          | 0.7975 |
-| 0.4081        | 2.3248 | 272  | 0.4524          | 0.8002 |
-| 0.3768        | 2.4615 | 288  | 0.4609          | 0.7931 |
-| 0.3838        | 2.5983 | 304  | 0.4511          | 0.8037 |
-| 0.3888        | 2.7350 | 320  | 0.4483          | 0.8011 |
-| 0.3791        | 2.8718 | 336  | 0.4487          | 0.8063 |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4468
+- F1: 0.8129
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 64
 - eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 70
+- num_epochs: 4
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss | F1     |
 |:-------------:|:------:|:----:|:---------------:|:------:|
+| 1.3435        | 0.0684 | 16   | 1.1414          | 0.5363 |
+| 1.1308        | 0.1368 | 32   | 1.0700          | 0.5570 |
+| 0.9965        | 0.2051 | 48   | 0.9828          | 0.5715 |
+| 0.9948        | 0.2735 | 64   | 0.8699          | 0.5988 |
+| 0.7915        | 0.3419 | 80   | 0.7255          | 0.6388 |
+| 0.7498        | 0.4103 | 96   | 0.7282          | 0.5896 |
+| 0.6487        | 0.4786 | 112  | 0.6206          | 0.6762 |
+| 0.6269        | 0.5470 | 128  | 0.5863          | 0.6995 |
+| 0.5854        | 0.6154 | 144  | 0.5598          | 0.7262 |
+| 0.591         | 0.6838 | 160  | 0.5459          | 0.7346 |
+| 0.5443        | 0.7521 | 176  | 0.5346          | 0.7422 |
+| 0.5621        | 0.8205 | 192  | 0.5178          | 0.7610 |
+| 0.4917        | 0.8889 | 208  | 0.5109          | 0.7698 |
+| 0.5397        | 0.9573 | 224  | 0.4948          | 0.7709 |
+| 0.5147        | 1.0256 | 240  | 0.4932          | 0.7681 |
+| 0.4582        | 1.0940 | 256  | 0.4871          | 0.7760 |
+| 0.4912        | 1.1624 | 272  | 0.4807          | 0.7822 |
+| 0.4322        | 1.2308 | 288  | 0.5000          | 0.7757 |
+| 0.4918        | 1.2991 | 304  | 0.4779          | 0.7850 |
+| 0.4611        | 1.3675 | 320  | 0.4709          | 0.7872 |
+| 0.4636        | 1.4359 | 336  | 0.4753          | 0.7804 |
+| 0.4458        | 1.5043 | 352  | 0.4774          | 0.7829 |
+| 0.4386        | 1.5726 | 368  | 0.4629          | 0.7983 |
+| 0.4551        | 1.6410 | 384  | 0.4600          | 0.7941 |
+| 0.465         | 1.7094 | 400  | 0.4549          | 0.7967 |
+| 0.4501        | 1.7778 | 416  | 0.4539          | 0.7966 |
+| 0.4083        | 1.8462 | 432  | 0.4633          | 0.7968 |
+| 0.4259        | 1.9145 | 448  | 0.4573          | 0.7959 |
+| 0.4371        | 1.9829 | 464  | 0.4506          | 0.8002 |
+| 0.4189        | 2.0513 | 480  | 0.4539          | 0.7901 |
+| 0.4324        | 2.1197 | 496  | 0.4698          | 0.7855 |
+| 0.4053        | 2.1880 | 512  | 0.4483          | 0.8042 |
+| 0.3535        | 2.2564 | 528  | 0.4515          | 0.8054 |
+| 0.3723        | 2.3248 | 544  | 0.4523          | 0.8091 |
+| 0.3865        | 2.3932 | 560  | 0.4634          | 0.7974 |
+| 0.3789        | 2.4615 | 576  | 0.4540          | 0.8031 |
+| 0.4015        | 2.5299 | 592  | 0.4434          | 0.8091 |
+| 0.3528        | 2.5983 | 608  | 0.4484          | 0.8085 |
+| 0.3634        | 2.6667 | 624  | 0.4440          | 0.8077 |
+| 0.3533        | 2.7350 | 640  | 0.4563          | 0.8004 |
+| 0.3757        | 2.8034 | 656  | 0.4498          | 0.8013 |
+| 0.3831        | 2.8718 | 672  | 0.4478          | 0.7993 |
+| 0.3851        | 2.9402 | 688  | 0.4411          | 0.8096 |
+| 0.3672        | 3.0085 | 704  | 0.4427          | 0.8115 |
+| 0.3356        | 3.0769 | 720  | 0.4492          | 0.8085 |
+| 0.3316        | 3.1453 | 736  | 0.4577          | 0.8108 |
+| 0.3297        | 3.2137 | 752  | 0.4546          | 0.8040 |
+| 0.3569        | 3.2821 | 768  | 0.4502          | 0.8070 |
+| 0.3517        | 3.3504 | 784  | 0.4473          | 0.8110 |
+| 0.3173        | 3.4188 | 800  | 0.4515          | 0.8092 |
+| 0.3369        | 3.4872 | 816  | 0.4536          | 0.8041 |
+| 0.2986        | 3.5556 | 832  | 0.4500          | 0.8115 |
+| 0.3565        | 3.6239 | 848  | 0.4480          | 0.8129 |
+| 0.3417        | 3.6923 | 864  | 0.4458          | 0.8120 |
+| 0.3321        | 3.7607 | 880  | 0.4477          | 0.8128 |
+| 0.3324        | 3.8291 | 896  | 0.4475          | 0.8123 |
+| 0.338         | 3.8974 | 912  | 0.4468          | 0.8143 |
+| 0.3419        | 3.9658 | 928  | 0.4468          | 0.8129 |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -10,15 +10,13 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 256,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
-  "modules_to_save": [
-    "score.weight"
-  ],
   "peft_type": "LORA",
-  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 128,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
+  "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14a3571a063afc9ad13aec07b18b399c9a9222fc98b135282c2b2d62f28c598c
-size 1268802808

 version https://git-lfs.github.com/spec/v1
+oid sha256:c91b9031eed3b152fe69bf4a30373bedfdf0b187105a5c4c3b963d5119100c3b
+size 1159750648

runs/Jun19_19-19-02_1cfbeb3fcc7d/events.out.tfevents.1718824742.1cfbeb3fcc7d.1573.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b461d19a3111ed080b416e9fdff22f60e58bcb3c647b1f3bf565b0e705fe20f
+size 5364

runs/Jun19_19-19-28_1cfbeb3fcc7d/events.out.tfevents.1718824769.1cfbeb3fcc7d.1573.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce071f459374637dcdde1aa71026964af43695cd1285fec191a4f161fbadfee
+size 36321

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35e5048b818c7be80a0fa915e212257550fcf5cc613bb5820fde12145936c406
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bada85ef55fb7462258f056b2e561ab9770cb07836281699caa38436d2312cc
 size 5048