msab97 commited on
Commit
927ff5b
·
verified ·
1 Parent(s): 513ce1f

End of training

Browse files
README.md CHANGED
@@ -18,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4647
22
- - Accuracy: 0.8284
23
- - F1 Macro: 0.7893
24
 
25
  ## Model description
26
 
@@ -55,18 +55,17 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro |
57
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|
58
- | 0.9663 | 1.0 | 461 | 0.9493 | 0.6278 | 0.4609 |
59
- | 0.5136 | 2.0 | 922 | 0.5921 | 0.7721 | 0.6943 |
60
- | 0.4578 | 3.0 | 1383 | 0.5004 | 0.8128 | 0.7707 |
61
- | 0.4147 | 4.0 | 1844 | 0.4632 | 0.8313 | 0.7930 |
62
- | 0.4134 | 5.0 | 2305 | 0.4603 | 0.8302 | 0.7950 |
63
- | 0.3137 | 6.0 | 2766 | 0.4506 | 0.8334 | 0.8007 |
64
 
65
 
66
  ### Framework versions
67
 
68
- - PEFT 0.13.2
69
- - Transformers 4.46.3
70
  - Pytorch 2.5.1+cu121
71
- - Datasets 3.1.0
72
- - Tokenizers 0.20.3
 
18
 
19
  This model is a fine-tuned version of [meta-llama/Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3923
22
+ - Accuracy: 0.8414
23
+ - F1 Macro: 0.8365
24
 
25
  ## Model description
26
 
 
55
 
56
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 Macro |
57
  |:-------------:|:-----:|:----:|:---------------:|:--------:|:--------:|
58
+ | 1.8719 | 1.0 | 454 | 0.8635 | 0.6562 | 0.6261 |
59
+ | 0.9455 | 2.0 | 908 | 0.4734 | 0.8168 | 0.8068 |
60
+ | 0.7437 | 3.0 | 1362 | 0.4071 | 0.8366 | 0.8305 |
61
+ | 0.7825 | 4.0 | 1816 | 0.3959 | 0.8433 | 0.8391 |
62
+ | 0.6047 | 5.0 | 2270 | 0.3910 | 0.8400 | 0.8341 |
 
63
 
64
 
65
  ### Framework versions
66
 
67
+ - PEFT 0.14.0
68
+ - Transformers 4.47.1
69
  - Pytorch 2.5.1+cu121
70
+ - Datasets 3.2.0
71
+ - Tokenizers 0.21.0
adapter_config.json CHANGED
@@ -3,6 +3,8 @@
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "meta-llama/Llama-3.2-1B",
5
  "bias": "none",
 
 
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
@@ -11,6 +13,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 8,
 
14
  "lora_dropout": 0.05,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -24,9 +27,9 @@
24
  "revision": null,
25
  "target_modules": [
26
  "k_proj",
27
- "q_proj",
28
  "o_proj",
29
- "v_proj"
 
30
  ],
31
  "task_type": "SEQ_CLS",
32
  "use_dora": false,
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "meta-llama/Llama-3.2-1B",
5
  "bias": "none",
6
+ "eva_config": null,
7
+ "exclude_modules": null,
8
  "fan_in_fan_out": false,
9
  "inference_mode": true,
10
  "init_lora_weights": true,
 
13
  "layers_to_transform": null,
14
  "loftq_config": {},
15
  "lora_alpha": 8,
16
+ "lora_bias": false,
17
  "lora_dropout": 0.05,
18
  "megatron_config": null,
19
  "megatron_core": "megatron.core",
 
27
  "revision": null,
28
  "target_modules": [
29
  "k_proj",
 
30
  "o_proj",
31
+ "v_proj",
32
+ "q_proj"
33
  ],
34
  "task_type": "SEQ_CLS",
35
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2bcd6c60001dbc2237b3505e76e426e0c04ec24b290b237796366b11b88369f
3
- size 13705936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9570697e31ab5a34151d097d327027761065671d7f15bdf09ea2e268ce59e17
3
+ size 13689552
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88edd027c86f02ac7eeed3312ae46b879bba5fa6513a61b33b80af8e429a3603
3
- size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228d077441d5e7cbe1b2437a6229a57c88d77117b635799e27470c0e6ef6e9e6
3
+ size 5304