SneakyLemon/Llama3LoraCauseEffect

Browse files

Files changed (12) hide show

README.md +26 -19
adapter_config.json +4 -4
adapter_model.safetensors +2 -2
runs/Jun14_14-46-55_0e3c5c5304b5/events.out.tfevents.1718376416.0e3c5c5304b5.2041.0 +3 -0
runs/Jun14_14-47-55_0e3c5c5304b5/events.out.tfevents.1718376476.0e3c5c5304b5.2041.1 +3 -0
runs/Jun14_14-51-36_0e3c5c5304b5/events.out.tfevents.1718376696.0e3c5c5304b5.2041.2 +3 -0
runs/Jun14_14-51-44_0e3c5c5304b5/events.out.tfevents.1718376705.0e3c5c5304b5.2041.3 +3 -0
runs/Jun14_14-52-22_0e3c5c5304b5/events.out.tfevents.1718376742.0e3c5c5304b5.2041.4 +3 -0
runs/Jun14_14-52-45_0e3c5c5304b5/events.out.tfevents.1718376766.0e3c5c5304b5.2041.5 +3 -0
runs/Jun14_14-54-00_0e3c5c5304b5/events.out.tfevents.1718376841.0e3c5c5304b5.2041.6 +3 -0
runs/Jun14_14-54-20_0e3c5c5304b5/events.out.tfevents.1718376861.0e3c5c5304b5.2041.7 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.5408
-- F1: 0.7432
 ## Model description
@@ -38,33 +38,40 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 64
 - eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 50
-- num_epochs: 1
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss | F1     |
 |:-------------:|:------:|:----:|:---------------:|:------:|
-| 1.7866        | 0.0684 | 16   | 1.4251          | 0.4727 |
-| 1.2951        | 0.1368 | 32   | 1.2220          | 0.4906 |
-| 1.1455        | 0.2051 | 48   | 1.0200          | 0.5153 |
-| 0.9479        | 0.2735 | 64   | 0.8454          | 0.5595 |
-| 0.7627        | 0.3419 | 80   | 0.7389          | 0.6079 |
-| 0.7078        | 0.4103 | 96   | 0.7002          | 0.6453 |
-| 0.6751        | 0.4786 | 112  | 0.6589          | 0.6638 |
-| 0.6441        | 0.5470 | 128  | 0.6116          | 0.6877 |
-| 0.5576        | 0.6154 | 144  | 0.5869          | 0.7088 |
-| 0.581         | 0.6838 | 160  | 0.5639          | 0.7300 |
-| 0.5661        | 0.7521 | 176  | 0.5626          | 0.7287 |
-| 0.5795        | 0.8205 | 192  | 0.5505          | 0.7399 |
-| 0.5344        | 0.8889 | 208  | 0.5528          | 0.7326 |
-| 0.5385        | 0.9573 | 224  | 0.5408          | 0.7432 |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4487
+- F1: 0.8063
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 128
 - eval_batch_size: 64
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 35
+- num_epochs: 3
 - mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss | F1     |
 |:-------------:|:------:|:----:|:---------------:|:------:|
+| 0.8917        | 0.1368 | 16   | 0.9228          | 0.5606 |
+| 0.8219        | 0.2735 | 32   | 0.7617          | 0.6112 |
+| 0.7154        | 0.4103 | 48   | 0.6455          | 0.6687 |
+| 0.6278        | 0.5470 | 64   | 0.5976          | 0.6955 |
+| 0.5923        | 0.6838 | 80   | 0.5443          | 0.7327 |
+| 0.5417        | 0.8205 | 96   | 0.5212          | 0.7479 |
+| 0.5094        | 0.9573 | 112  | 0.5087          | 0.7586 |
+| 0.4866        | 1.0940 | 128  | 0.4835          | 0.7719 |
+| 0.4743        | 1.2308 | 144  | 0.5172          | 0.7609 |
+| 0.4887        | 1.3675 | 160  | 0.4905          | 0.7718 |
+| 0.452         | 1.5043 | 176  | 0.4706          | 0.7817 |
+| 0.4592        | 1.6410 | 192  | 0.4658          | 0.7795 |
+| 0.4372        | 1.7778 | 208  | 0.4726          | 0.7782 |
+| 0.4387        | 1.9145 | 224  | 0.4769          | 0.7775 |
+| 0.4242        | 2.0513 | 240  | 0.4526          | 0.7929 |
+| 0.3881        | 2.1880 | 256  | 0.4541          | 0.7975 |
+| 0.4081        | 2.3248 | 272  | 0.4524          | 0.8002 |
+| 0.3768        | 2.4615 | 288  | 0.4609          | 0.7931 |
+| 0.3838        | 2.5983 | 304  | 0.4511          | 0.8037 |
+| 0.3888        | 2.7350 | 320  | 0.4483          | 0.8011 |
+| 0.3791        | 2.8718 | 336  | 0.4487          | 0.8063 |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -10,7 +10,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 128,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -18,12 +18,12 @@
     "score.weight"
   ],
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj"
   ],
   "task_type": "SEQ_CLS",
   "use_dora": false,

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 256,
   "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
     "score.weight"
   ],
   "peft_type": "LORA",
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
+    "q_proj"
   ],
   "task_type": "SEQ_CLS",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9df184ac2ab70cd0516e96972eb137aed3289badce24899f175151d97a09837
-size 1159750648

 version https://git-lfs.github.com/spec/v1
+oid sha256:14a3571a063afc9ad13aec07b18b399c9a9222fc98b135282c2b2d62f28c598c
+size 1268802808

runs/Jun14_14-46-55_0e3c5c5304b5/events.out.tfevents.1718376416.0e3c5c5304b5.2041.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c293a85d73ab75be928e04039566831c4d322d8ac2af884e403aad08fea98a4
+size 5364

runs/Jun14_14-47-55_0e3c5c5304b5/events.out.tfevents.1718376476.0e3c5c5304b5.2041.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f98954ec707cf67fbe5c91b11626d58d98fd390b281083e2a643d96a27b01995
+size 5931

runs/Jun14_14-51-36_0e3c5c5304b5/events.out.tfevents.1718376696.0e3c5c5304b5.2041.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e89fc539d379a8c715999a428cbb1c03feaaef2c6c69e8dc0be248b17f61ba18
+size 5413

runs/Jun14_14-51-44_0e3c5c5304b5/events.out.tfevents.1718376705.0e3c5c5304b5.2041.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:55f7b47d9328a256c4b59886fa9b6cdde66870087b8da1f6d02decf4e7d991f5
+size 5414

runs/Jun14_14-52-22_0e3c5c5304b5/events.out.tfevents.1718376742.0e3c5c5304b5.2041.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3a6558c304a5e163faf7e816a617d7cf1630958d33dd2baa24b6549cb4e9e76
+size 5414

runs/Jun14_14-52-45_0e3c5c5304b5/events.out.tfevents.1718376766.0e3c5c5304b5.2041.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec27334ba4cdc0649583d81c2455d1c857fd5feb6d3bb5761773a1adf7cfd5bb
+size 5414

runs/Jun14_14-54-00_0e3c5c5304b5/events.out.tfevents.1718376841.0e3c5c5304b5.2041.6 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f16d4aafa04b7fc19214ee48156ce2cb8dab45d410c8bebcbd6be2bfe7ef690
+size 5414

runs/Jun14_14-54-20_0e3c5c5304b5/events.out.tfevents.1718376861.0e3c5c5304b5.2041.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba5ae507184e40d70c5eb7b902d27054e2fb7286fa084ffd5307109b7799f857
+size 16786

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3827c93a3519ac622377eeebe4294776a36eb7406f03ab2f20d9dffda97171e3
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:35e5048b818c7be80a0fa915e212257550fcf5cc613bb5820fde12145936c406
 size 5048