mebinjoy commited on
Commit
11cd393
1 Parent(s): 4a80d88

End of training

Browse files
README.md CHANGED
@@ -1,21 +1,22 @@
1
  ---
 
2
  library_name: peft
3
  tags:
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
- base_model: ybelkada/falcon-7b-sharded-bf16
8
  model-index:
9
- - name: results
10
  results: []
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- # results
17
 
18
- This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
19
 
20
  ## Model description
21
 
@@ -43,7 +44,7 @@ The following hyperparameters were used during training:
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: constant
45
  - lr_scheduler_warmup_ratio: 0.03
46
- - training_steps: 10
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
@@ -52,8 +53,8 @@ The following hyperparameters were used during training:
52
 
53
  ### Framework versions
54
 
55
- - PEFT 0.7.2.dev0
56
- - Transformers 4.36.2
57
- - Pytorch 2.1.0+cu121
58
- - Datasets 2.16.1
59
- - Tokenizers 0.15.0
 
1
  ---
2
+ license: unknown
3
  library_name: peft
4
  tags:
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
+ base_model: tiiuae/falcon-11B
9
  model-index:
10
+ - name: falcon2_guanaco
11
  results: []
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ # falcon2_guanaco
18
 
19
+ This model is a fine-tuned version of [tiiuae/falcon-11B](https://huggingface.co/tiiuae/falcon-11B) on an unknown dataset.
20
 
21
  ## Model description
22
 
 
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: constant
46
  - lr_scheduler_warmup_ratio: 0.03
47
+ - training_steps: 500
48
  - mixed_precision_training: Native AMP
49
 
50
  ### Training results
 
53
 
54
  ### Framework versions
55
 
56
+ - PEFT 0.11.2.dev0
57
+ - Transformers 4.41.1
58
+ - Pytorch 2.3.0+cu121
59
+ - Datasets 2.19.1
60
+ - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
@@ -20,10 +21,11 @@
20
  "revision": null,
21
  "target_modules": [
22
  "query_key_value",
23
- "dense_4h_to_h",
24
  "dense",
25
- "dense_h_to_4h"
 
26
  ],
27
  "task_type": "CAUSAL_LM",
 
28
  "use_rslora": false
29
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "tiiuae/falcon-11B",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
+ "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
 
21
  "revision": null,
22
  "target_modules": [
23
  "query_key_value",
 
24
  "dense",
25
+ "dense_h_to_4h",
26
+ "dense_4h_to_h"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
+ "use_dora": false,
30
  "use_rslora": false
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be6c5ddef636364bef3e0b130c7248ba8b25b9fddffdbe6ab2ae90cca57eec75
3
- size 522227376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac31ef1ec252339091ab4d16b5ca8b5a05c6c02684b29c656fe2f9446d4b5339
3
+ size 912329728
runs/May28_02-50-04_8ac7dcedda8d/events.out.tfevents.1716864618.8ac7dcedda8d.217.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5741b5a8e970336f3ecf70d0dc836f0c1216ae05ef10cbe9d7f77c276ec45208
3
+ size 16885
special_tokens_map.json CHANGED
@@ -12,6 +12,13 @@
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
 
 
 
 
 
 
 
15
  "eos_token": {
16
  "content": "<|endoftext|>",
17
  "lstrip": false,
 
12
  ">>SUFFIX<<",
13
  ">>MIDDLE<<"
14
  ],
15
+ "bos_token": {
16
+ "content": ">>",
17
+ "lstrip": false,
18
+ "normalized": false,
19
+ "rstrip": false,
20
+ "single_word": false
21
+ },
22
  "eos_token": {
23
  "content": "<|endoftext|>",
24
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -110,6 +115,15 @@
110
  "rstrip": false,
111
  "normalized": false,
112
  "special": true
 
 
 
 
 
 
 
 
 
113
  }
114
  ],
115
  "normalizer": null,
@@ -155,6 +169,7 @@
155
  "end_of_word_suffix": null,
156
  "fuse_unk": false,
157
  "byte_fallback": false,
 
158
  "vocab": {
159
  ">>TITLE<<": 0,
160
  ">>ABSTRACT<<": 1,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
115
  "rstrip": false,
116
  "normalized": false,
117
  "special": true
118
+ },
119
+ {
120
+ "id": 500,
121
+ "content": ">>",
122
+ "single_word": false,
123
+ "lstrip": false,
124
+ "rstrip": false,
125
+ "normalized": false,
126
+ "special": true
127
  }
128
  ],
129
  "normalizer": null,
 
169
  "end_of_word_suffix": null,
170
  "fuse_unk": false,
171
  "byte_fallback": false,
172
+ "ignore_merges": false,
173
  "vocab": {
174
  ">>TITLE<<": 0,
175
  ">>ABSTRACT<<": 1,
tokenizer_config.json CHANGED
@@ -96,6 +96,14 @@
96
  "rstrip": false,
97
  "single_word": false,
98
  "special": true
 
 
 
 
 
 
 
 
99
  }
100
  },
101
  "additional_special_tokens": [
@@ -111,9 +119,17 @@
111
  ">>SUFFIX<<",
112
  ">>MIDDLE<<"
113
  ],
 
 
114
  "clean_up_tokenization_spaces": true,
 
115
  "eos_token": "<|endoftext|>",
116
- "model_max_length": 2048,
 
 
 
 
117
  "pad_token": "<|endoftext|>",
 
118
  "tokenizer_class": "PreTrainedTokenizerFast"
119
  }
 
96
  "rstrip": false,
97
  "single_word": false,
98
  "special": true
99
+ },
100
+ "500": {
101
+ "content": ">>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
  }
108
  },
109
  "additional_special_tokens": [
 
119
  ">>SUFFIX<<",
120
  ">>MIDDLE<<"
121
  ],
122
+ "bos_token": ">>",
123
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ 'User: \n' + message['content'] }}\n{% elif message['role'] == 'system' %}\n{{ 'System: ' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ 'Falcon:\n' + message['content']}}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ 'Falcon:' }}\n{% endif %}\n{% endfor %}",
124
  "clean_up_tokenization_spaces": true,
125
+ "device_map": "cuda:2",
126
  "eos_token": "<|endoftext|>",
127
+ "model_input_names": [
128
+ "input_ids",
129
+ "attention_mask"
130
+ ],
131
+ "model_max_length": 1000000000000000019884624838656,
132
  "pad_token": "<|endoftext|>",
133
+ "padding_side": "left",
134
  "tokenizer_class": "PreTrainedTokenizerFast"
135
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aabcc1d61edf5c2326c342c0809acf4d925cf2a4c2f09ef4a0e9c259c76caf83
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c5d62e82cd4c9f81d64135c00c4f3635a978975b236ee7c8b859ffdf0e66e2
3
+ size 5112