aengusl commited on
Commit
d3ca853
1 Parent(s): c387004

Model save

Browse files
README.md CHANGED
@@ -48,6 +48,6 @@ The following hyperparameters were used during training:
48
  ### Framework versions
49
 
50
  - Transformers 4.35.0
51
- - Pytorch 2.2.0+cu121
52
- - Datasets 2.14.7
53
  - Tokenizers 0.14.1
 
48
  ### Framework versions
49
 
50
  - Transformers 4.35.0
51
+ - Pytorch 2.1.0+cu121
52
+ - Datasets 2.14.6
53
  - Tokenizers 0.14.1
adapter_config.json CHANGED
@@ -8,24 +8,20 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
- "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
- "megatron_config": null,
15
- "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "down_proj",
23
- "k_proj",
24
  "o_proj",
25
  "up_proj",
26
- "q_proj",
27
- "v_proj"
 
28
  ],
29
- "task_type": "CAUSAL_LM",
30
- "use_rslora": false
31
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "q_proj",
 
20
  "o_proj",
21
  "up_proj",
22
+ "down_proj",
23
+ "v_proj",
24
+ "k_proj"
25
  ],
26
+ "task_type": "CAUSAL_LM"
 
27
  }
runs/May16_17-06-17_cdc34a7ce3be/events.out.tfevents.1715879257.cdc34a7ce3be.62268.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41e5b6791427747363f2bd22c96b74db429e177534dadbee660f96713c5a4c62
3
+ size 4308
step_0/README.md CHANGED
@@ -18,7 +18,6 @@ base_model: meta-llama/Llama-2-7b-chat-hf
18
 
19
 
20
  - **Developed by:** [More Information Needed]
21
- - **Funded by [optional]:** [More Information Needed]
22
  - **Shared by [optional]:** [More Information Needed]
23
  - **Model type:** [More Information Needed]
24
  - **Language(s) (NLP):** [More Information Needed]
@@ -77,7 +76,7 @@ Use the code below to get started with the model.
77
 
78
  ### Training Data
79
 
80
- <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
 
82
  [More Information Needed]
83
 
@@ -108,7 +107,7 @@ Use the code below to get started with the model.
108
 
109
  #### Testing Data
110
 
111
- <!-- This should link to a Dataset Card if possible. -->
112
 
113
  [More Information Needed]
114
 
@@ -199,6 +198,10 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
199
  [More Information Needed]
200
 
201
 
 
 
 
202
  ### Framework versions
203
 
204
- - PEFT 0.8.2
 
 
18
 
19
 
20
  - **Developed by:** [More Information Needed]
 
21
  - **Shared by [optional]:** [More Information Needed]
22
  - **Model type:** [More Information Needed]
23
  - **Language(s) (NLP):** [More Information Needed]
 
76
 
77
  ### Training Data
78
 
79
+ <!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
80
 
81
  [More Information Needed]
82
 
 
107
 
108
  #### Testing Data
109
 
110
+ <!-- This should link to a Data Card if possible. -->
111
 
112
  [More Information Needed]
113
 
 
198
  [More Information Needed]
199
 
200
 
201
+ ## Training procedure
202
+
203
+
204
  ### Framework versions
205
 
206
+
207
+ - PEFT 0.6.1
step_0/adapter_config.json CHANGED
@@ -8,24 +8,20 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
- "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
- "megatron_config": null,
15
- "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "down_proj",
23
- "k_proj",
24
  "o_proj",
25
  "up_proj",
26
- "q_proj",
27
- "v_proj"
 
28
  ],
29
- "task_type": "CAUSAL_LM",
30
- "use_rslora": false
31
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "q_proj",
 
20
  "o_proj",
21
  "up_proj",
22
+ "down_proj",
23
+ "v_proj",
24
+ "k_proj"
25
  ],
26
+ "task_type": "CAUSAL_LM"
 
27
  }
step_0/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f31f86312a87c59ce516aaa40473d613fb013c8021c43eed2f88a3e4e0ffda1
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66247858b5728ead4dbdf5ac24a724ae24015e0ac1489f89e2b67562e415e726
3
  size 5560
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f31f86312a87c59ce516aaa40473d613fb013c8021c43eed2f88a3e4e0ffda1
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66247858b5728ead4dbdf5ac24a724ae24015e0ac1489f89e2b67562e415e726
3
  size 5560