aengusl commited on
Commit
08947a6
1 Parent(s): 5e11600

Model save

Browse files
README.md CHANGED
@@ -5,9 +5,9 @@ tags:
5
  - trl
6
  - sft
7
  - generated_from_trainer
 
8
  datasets:
9
  - generator
10
- base_model: meta-llama/Llama-2-7b-chat-hf
11
  model-index:
12
  - name: llama2-7b-sft-lora
13
  results: []
@@ -42,18 +42,24 @@ The following hyperparameters were used during training:
42
  - eval_batch_size: 8
43
  - seed: 4
44
  - distributed_type: multi-GPU
45
- - num_devices: 4
46
  - gradient_accumulation_steps: 4
47
- - total_train_batch_size: 64
48
- - total_eval_batch_size: 32
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - num_epochs: 1
52
 
53
  ### Framework versions
54
 
55
- - PEFT 0.8.2
56
- - Transformers 4.37.2
57
- - Pytorch 2.2.0+cu121
58
- - Datasets 2.16.1
59
- - Tokenizers 0.15.1
 
 
 
 
 
 
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
+ base_model: meta-llama/Llama-2-7b-chat-hf
9
  datasets:
10
  - generator
 
11
  model-index:
12
  - name: llama2-7b-sft-lora
13
  results: []
 
42
  - eval_batch_size: 8
43
  - seed: 4
44
  - distributed_type: multi-GPU
45
+ - num_devices: 2
46
  - gradient_accumulation_steps: 4
47
+ - total_train_batch_size: 32
48
+ - total_eval_batch_size: 16
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: cosine
51
  - num_epochs: 1
52
 
53
  ### Framework versions
54
 
55
+ - Transformers 4.41.0
56
+ - Pytorch 2.1.0+cu121
57
+ - Datasets 2.14.6
58
+ - Tokenizers 0.19.1
59
+ ## Training procedure
60
+
61
+
62
+ ### Framework versions
63
+
64
+
65
+ - PEFT 0.6.1
adapter_config.json CHANGED
@@ -8,24 +8,20 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
- "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
- "megatron_config": null,
15
- "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "v_proj",
23
  "q_proj",
24
- "k_proj",
25
- "o_proj",
26
  "up_proj",
27
- "down_proj"
28
  ],
29
- "task_type": "CAUSAL_LM",
30
- "use_rslora": false
31
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "k_proj",
20
+ "down_proj",
21
  "v_proj",
22
  "q_proj",
 
 
23
  "up_proj",
24
+ "o_proj"
25
  ],
26
+ "task_type": "CAUSAL_LM"
 
27
  }
runs/May20_14-41-33_04bac4264f87/events.out.tfevents.1716216487.04bac4264f87.3853.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36b1bd1bf287b0eaf3ea6ed62a68c8ce7b745fe4d536f214ea02b53c594e7735
3
+ size 4955
step_0/README.md CHANGED
@@ -81,7 +81,7 @@ Use the code below to get started with the model.
81
 
82
  [More Information Needed]
83
 
84
- ### Training Procedure
85
 
86
  <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
 
@@ -197,8 +197,10 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
197
  ## Model Card Contact
198
 
199
  [More Information Needed]
 
200
 
201
 
202
  ### Framework versions
203
 
204
- - PEFT 0.8.2
 
 
81
 
82
  [More Information Needed]
83
 
84
+ ### Training Procedure
85
 
86
  <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
 
 
197
  ## Model Card Contact
198
 
199
  [More Information Needed]
200
+ ## Training procedure
201
 
202
 
203
  ### Framework versions
204
 
205
+
206
+ - PEFT 0.6.1
step_0/adapter_config.json CHANGED
@@ -8,24 +8,20 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
- "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
- "megatron_config": null,
15
- "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "v_proj",
23
  "q_proj",
24
- "k_proj",
25
- "o_proj",
26
  "up_proj",
27
- "down_proj"
28
  ],
29
- "task_type": "CAUSAL_LM",
30
- "use_rslora": false
31
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "k_proj",
20
+ "down_proj",
21
  "v_proj",
22
  "q_proj",
 
 
23
  "up_proj",
24
+ "o_proj"
25
  ],
26
+ "task_type": "CAUSAL_LM"
 
27
  }
step_0/tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
step_0/tokenizer_config.json CHANGED
@@ -31,7 +31,6 @@
31
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
- "legacy": false,
35
  "model_max_length": 2048,
36
  "pad_token": "</s>",
37
  "padding_side": "right",
 
31
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
 
34
  "model_max_length": 2048,
35
  "pad_token": "</s>",
36
  "padding_side": "right",
step_0/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a17890ae8adc5c75c8c626de8ca0b3b549dff571a11f82a797bc2d7a5b3bdbe
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e053cf93f1736083927827130a127d281aa30f3d7b5c8bd7f0ef7a5ad298352
3
+ size 6136
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -31,7 +31,6 @@
31
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
34
- "legacy": false,
35
  "model_max_length": 2048,
36
  "pad_token": "</s>",
37
  "padding_side": "right",
 
31
  "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
 
34
  "model_max_length": 2048,
35
  "pad_token": "</s>",
36
  "padding_side": "right",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a17890ae8adc5c75c8c626de8ca0b3b549dff571a11f82a797bc2d7a5b3bdbe
3
- size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e053cf93f1736083927827130a127d281aa30f3d7b5c8bd7f0ef7a5ad298352
3
+ size 6136