Chris Alexiuk commited on
Commit
243a02c
1 Parent(s): d8eee1e

ai-maker-space/llama2-instruct-tune-500s

Browse files
README.md CHANGED
@@ -1,7 +1,12 @@
1
  ---
2
- base_model: NousResearch/Llama-2-7b-hf
3
  tags:
 
 
4
  - generated_from_trainer
 
 
 
5
  model-index:
6
  - name: llama2_instruct_generation
7
  results: []
@@ -12,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # llama2_instruct_generation
14
 
15
- This model is a fine-tuned version of [NousResearch/Llama-2-7b-hf](https://huggingface.co/NousResearch/Llama-2-7b-hf) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 1.6733
18
 
19
  ## Model description
20
 
@@ -46,36 +51,37 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | 1.9478 | 0.0 | 20 | 1.8129 |
50
- | 1.7992 | 0.0 | 40 | 1.7797 |
51
- | 1.8765 | 0.0 | 60 | 1.7642 |
52
- | 1.8488 | 0.01 | 80 | 1.7527 |
53
- | 1.8512 | 0.01 | 100 | 1.7384 |
54
- | 1.8856 | 0.01 | 120 | 1.7136 |
55
- | 1.8429 | 0.01 | 140 | 1.7035 |
56
- | 1.8258 | 0.01 | 160 | 1.6970 |
57
- | 1.8125 | 0.01 | 180 | 1.6923 |
58
- | 1.7902 | 0.01 | 200 | 1.6898 |
59
- | 1.8622 | 0.02 | 220 | 1.6895 |
60
- | 1.8823 | 0.02 | 240 | 1.6867 |
61
- | 1.7728 | 0.02 | 260 | 1.6837 |
62
- | 1.6331 | 0.02 | 280 | 1.6820 |
63
- | 1.8399 | 0.02 | 300 | 1.6821 |
64
- | 1.735 | 0.02 | 320 | 1.6810 |
65
- | 1.8192 | 0.02 | 340 | 1.6804 |
66
- | 1.7609 | 0.03 | 360 | 1.6771 |
67
- | 1.6517 | 0.03 | 380 | 1.6768 |
68
- | 1.6272 | 0.03 | 400 | 1.6738 |
69
- | 1.7947 | 0.03 | 420 | 1.6751 |
70
- | 1.7794 | 0.03 | 440 | 1.6740 |
71
- | 1.6371 | 0.03 | 460 | 1.6726 |
72
- | 1.7126 | 0.03 | 480 | 1.6730 |
73
- | 1.7321 | 0.04 | 500 | 1.6733 |
74
 
75
 
76
  ### Framework versions
77
 
78
- - Transformers 4.35.1
79
- - Pytorch 2.1.0+cu118
80
- - Datasets 2.14.6
81
- - Tokenizers 0.14.1
 
 
1
  ---
2
+ library_name: peft
3
  tags:
4
+ - trl
5
+ - sft
6
  - generated_from_trainer
7
+ datasets:
8
+ - generator
9
+ base_model: NousResearch/Llama-2-7b-hf
10
  model-index:
11
  - name: llama2_instruct_generation
12
  results: []
 
17
 
18
  # llama2_instruct_generation
19
 
20
+ This model is a fine-tuned version of [NousResearch/Llama-2-7b-hf](https://huggingface.co/NousResearch/Llama-2-7b-hf) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.6759
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.8994 | 0.0 | 20 | 1.8109 |
55
+ | 1.8521 | 0.01 | 40 | 1.7830 |
56
+ | 1.8745 | 0.01 | 60 | 1.7694 |
57
+ | 1.8092 | 0.01 | 80 | 1.7576 |
58
+ | 1.8042 | 0.01 | 100 | 1.7436 |
59
+ | 1.9305 | 0.02 | 120 | 1.7090 |
60
+ | 1.7965 | 0.02 | 140 | 1.7034 |
61
+ | 1.8457 | 0.02 | 160 | 1.6977 |
62
+ | 1.823 | 0.02 | 180 | 1.6943 |
63
+ | 1.7997 | 0.03 | 200 | 1.6922 |
64
+ | 1.7614 | 0.03 | 220 | 1.6895 |
65
+ | 1.7701 | 0.03 | 240 | 1.6886 |
66
+ | 1.8093 | 0.04 | 260 | 1.6877 |
67
+ | 1.8101 | 0.04 | 280 | 1.6847 |
68
+ | 1.8109 | 0.04 | 300 | 1.6834 |
69
+ | 1.7523 | 0.04 | 320 | 1.6807 |
70
+ | 1.7575 | 0.05 | 340 | 1.6802 |
71
+ | 1.8497 | 0.05 | 360 | 1.6783 |
72
+ | 1.8347 | 0.05 | 380 | 1.6781 |
73
+ | 1.8019 | 0.05 | 400 | 1.6766 |
74
+ | 1.7267 | 0.06 | 420 | 1.6770 |
75
+ | 1.7849 | 0.06 | 440 | 1.6767 |
76
+ | 1.7727 | 0.06 | 460 | 1.6748 |
77
+ | 1.7796 | 0.07 | 480 | 1.6744 |
78
+ | 1.7963 | 0.07 | 500 | 1.6759 |
79
 
80
 
81
  ### Framework versions
82
 
83
+ - PEFT 0.7.1
84
+ - Transformers 4.36.2
85
+ - Pytorch 2.1.0+cu121
86
+ - Datasets 2.16.1
87
+ - Tokenizers 0.15.0
adapter_config.json CHANGED
@@ -8,16 +8,19 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "q_proj"
21
  ],
22
  "task_type": "CAUSAL_LM"
23
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
+ "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "q_proj",
23
+ "v_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e44ce263e6fd885f50d82ca515b9325375b43ee36ededb75acf161ce88bc2e41
3
- size 48
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9279cb010e4c54dff6a2480dfa4fc6d285a9df96a097c6631032b2b3628018
3
+ size 134235048
runs/Jan18_22-19-50_09c86763fe3a/events.out.tfevents.1705616457.09c86763fe3a.251.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1038dc4f67fab820ca160b34cd00e5d70dab3cd5a5fc731990dc41e155056517
3
+ size 19749
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95aa17d15e436395bb17415ed4cb606077c6ea609cce8bb1f8da04d026217b9d
3
- size 4600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f88f7da09dbe9f6769612e6c5e2adb43e29836217817946e1acd3840967886
3
+ size 4728