deronDi commited on
Commit
c54d702
1 Parent(s): 56195fa

End of training

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: ybelkada/falcon-7b-sharded-bf16
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # results
14
 
15
- This model is a fine-tuned version of [ybelkada/falcon-7b-sharded-bf16](https://huggingface.co/ybelkada/falcon-7b-sharded-bf16) on an unknown dataset.
16
 
17
  ## Model description
18
 
@@ -32,23 +32,19 @@ More information needed
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
- - train_batch_size: 4
36
  - eval_batch_size: 8
37
  - seed: 42
38
- - gradient_accumulation_steps: 4
39
- - total_train_batch_size: 16
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: constant
42
  - lr_scheduler_warmup_ratio: 0.03
43
- - training_steps: 120
44
-
45
- ### Training results
46
-
47
-
48
 
49
  ### Framework versions
50
 
51
- - Transformers 4.34.1
52
  - Pytorch 2.1.0+cu118
53
  - Datasets 2.14.6
54
  - Tokenizers 0.14.1
 
1
  ---
2
+ base_model: meta-llama/Llama-2-7b-hf
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # results
14
 
15
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
16
 
17
  ## Model description
18
 
 
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
+ - train_batch_size: 2
36
  - eval_batch_size: 8
37
  - seed: 42
38
+ - gradient_accumulation_steps: 2
39
+ - total_train_batch_size: 4
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: constant
42
  - lr_scheduler_warmup_ratio: 0.03
43
+ - training_steps: 1030
 
 
 
 
44
 
45
  ### Framework versions
46
 
47
+ - Transformers 4.35.0
48
  - Pytorch 2.1.0+cu118
49
  - Datasets 2.14.6
50
  - Tokenizers 0.14.1
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "ybelkada/falcon-7b-sharded-bf16",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -16,10 +16,8 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "dense",
20
- "dense_4h_to_h",
21
- "dense_h_to_4h",
22
- "query_key_value"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "v_proj",
20
+ "q_proj"
 
 
21
  ],
22
  "task_type": "CAUSAL_LM"
23
  }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba000da6875b4795731aa7c61f30bc14e9b2654d8f5123b53e820abcf40758b
3
+ size 134235048
runs/Nov12_22-38-30_ba12b1a0542b/events.out.tfevents.1699828750.ba12b1a0542b.5474.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c7b426ba8c227006b0b45b3564a9091174ea0eba7b493f59c05b2ae25ca0712
3
+ size 4609
runs/Nov12_22-41-40_ba12b1a0542b/events.out.tfevents.1699828917.ba12b1a0542b.5474.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4a0c80a2015fcf456b4f7e1543588832881957aba8260bbb561cffb131ff5e
3
+ size 4184
runs/Nov12_22-44-38_ba12b1a0542b/events.out.tfevents.1699829107.ba12b1a0542b.8201.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb8ac3d1fc9777e6ae50a1573023c4e7cab406df7a164ef10392f0ea11c80dc
3
+ size 6150
special_tokens_map.json CHANGED
@@ -1,17 +1,24 @@
1
  {
2
- "additional_special_tokens": [
3
- ">>TITLE<<",
4
- ">>ABSTRACT<<",
5
- ">>INTRODUCTION<<",
6
- ">>SUMMARY<<",
7
- ">>COMMENT<<",
8
- ">>ANSWER<<",
9
- ">>QUESTION<<",
10
- ">>DOMAIN<<",
11
- ">>PREFIX<<",
12
- ">>SUFFIX<<",
13
- ">>MIDDLE<<"
14
- ],
15
- "eos_token": "<|endoftext|>",
16
- "pad_token": "<|endoftext|>"
 
 
 
 
 
 
 
17
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
5
- "content": ">>TITLE<<",
6
  "lstrip": false,
7
  "normalized": false,
8
  "rstrip": false,
@@ -10,7 +9,7 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": ">>ABSTRACT<<",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
@@ -18,79 +17,7 @@
18
  "special": true
19
  },
20
  "2": {
21
- "content": ">>INTRODUCTION<<",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": ">>SUMMARY<<",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "4": {
37
- "content": ">>COMMENT<<",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- },
44
- "5": {
45
- "content": ">>ANSWER<<",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "6": {
53
- "content": ">>QUESTION<<",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
- },
60
- "7": {
61
- "content": ">>DOMAIN<<",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": true
67
- },
68
- "8": {
69
- "content": ">>PREFIX<<",
70
- "lstrip": false,
71
- "normalized": false,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": true
75
- },
76
- "9": {
77
- "content": ">>SUFFIX<<",
78
- "lstrip": false,
79
- "normalized": false,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": true
83
- },
84
- "10": {
85
- "content": ">>MIDDLE<<",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": true
91
- },
92
- "11": {
93
- "content": "<|endoftext|>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
@@ -98,22 +25,15 @@
98
  "special": true
99
  }
100
  },
101
- "additional_special_tokens": [
102
- ">>TITLE<<",
103
- ">>ABSTRACT<<",
104
- ">>INTRODUCTION<<",
105
- ">>SUMMARY<<",
106
- ">>COMMENT<<",
107
- ">>ANSWER<<",
108
- ">>QUESTION<<",
109
- ">>DOMAIN<<",
110
- ">>PREFIX<<",
111
- ">>SUFFIX<<",
112
- ">>MIDDLE<<"
113
- ],
114
- "clean_up_tokenization_spaces": true,
115
- "eos_token": "<|endoftext|>",
116
- "model_max_length": 2048,
117
- "pad_token": "<|endoftext|>",
118
- "tokenizer_class": "PreTrainedTokenizerFast"
119
  }
 
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "<unk>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
 
9
  "special": true
10
  },
11
  "1": {
12
+ "content": "<s>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
17
  "special": true
18
  },
19
  "2": {
20
+ "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
 
25
  "special": true
26
  }
27
  },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "legacy": false,
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "</s>",
34
+ "padding_side": "right",
35
+ "sp_model_kwargs": {},
36
+ "tokenizer_class": "LlamaTokenizer",
37
+ "unk_token": "<unk>",
38
+ "use_default_system_prompt": false
 
 
 
 
 
 
 
39
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e88ad96b915f87c50a4b8a175c2f0def18cc770eea678e2b61bd7df771c1ef72
3
- size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0470d89154ed8e29709073130794717ffd9eb766494900f343c6cd00361ce9
3
+ size 4600