nvan15 commited on
Commit
9cf4652
·
verified ·
1 Parent(s): e6dd826

Batch upload part 17

Browse files
Files changed (50) hide show
  1. nl_tasks/exp100/run_ex02/ft/adapter_config.json +18 -0
  2. nl_tasks/exp100/run_ex02/ft/special_tokens_map.json +24 -0
  3. nl_tasks/exp100/run_ex02/ft/tokenizer.json +0 -0
  4. nl_tasks/exp100/run_ex02/ft/tokenizer.model +3 -0
  5. nl_tasks/exp100/run_ex02/ft/tokenizer_config.json +43 -0
  6. nl_tasks/exp100/run_ex02/ft2/adapter_config.json +18 -0
  7. nl_tasks/exp100/run_ex02/ft2/adapter_model.bin +3 -0
  8. nl_tasks/exp100/run_ex03/ft/adapter_config.json +18 -0
  9. nl_tasks/exp100/run_ex03/ft/special_tokens_map.json +24 -0
  10. nl_tasks/exp100/run_ex03/ft/tokenizer.json +0 -0
  11. nl_tasks/exp100/run_ex03/ft/tokenizer.model +3 -0
  12. nl_tasks/exp100/run_ex03/ft/tokenizer_config.json +43 -0
  13. nl_tasks/exp100/run_ex03/ft2/adapter_config.json +18 -0
  14. nl_tasks/exp100/run_ex03/ft2/adapter_model.bin +3 -0
  15. nl_tasks/exp100/run_ex03/trainer_state.json +260 -0
  16. nl_tasks/exp100/run_ex04/ft/adapter_config.json +18 -0
  17. nl_tasks/exp100/run_ex04/ft/special_tokens_map.json +24 -0
  18. nl_tasks/exp100/run_ex04/ft/tokenizer.json +0 -0
  19. nl_tasks/exp100/run_ex04/ft/tokenizer.model +3 -0
  20. nl_tasks/exp100/run_ex04/ft/tokenizer_config.json +43 -0
  21. nl_tasks/exp100/run_ex04/ft2/adapter_config.json +18 -0
  22. nl_tasks/exp100/run_ex04/ft2/adapter_model.bin +3 -0
  23. nl_tasks/exp100/run_ex04/trainer_state.json +260 -0
  24. nl_tasks/exp100/run_ex05/ft/adapter_config.json +18 -0
  25. nl_tasks/exp100/run_ex05/ft/special_tokens_map.json +24 -0
  26. nl_tasks/exp100/run_ex05/ft/tokenizer.json +0 -0
  27. nl_tasks/exp100/run_ex05/ft/tokenizer.model +3 -0
  28. nl_tasks/exp100/run_ex05/ft/tokenizer_config.json +43 -0
  29. nl_tasks/exp100/run_ex05/ft2/adapter_config.json +18 -0
  30. nl_tasks/exp100/run_ex05/ft2/adapter_model.bin +3 -0
  31. nl_tasks/exp100/run_ex05/trainer_state.json +365 -0
  32. nl_tasks/exp100/run_ex06/ft/adapter_config.json +18 -0
  33. nl_tasks/exp100/run_ex06/ft/special_tokens_map.json +24 -0
  34. nl_tasks/exp100/run_ex06/ft/tokenizer.json +0 -0
  35. nl_tasks/exp100/run_ex06/ft/tokenizer.model +3 -0
  36. nl_tasks/exp100/run_ex06/ft/tokenizer_config.json +43 -0
  37. nl_tasks/exp100/run_ex06/ft2/adapter_config.json +18 -0
  38. nl_tasks/exp100/run_ex06/ft2/adapter_model.bin +3 -0
  39. nl_tasks/exp100/run_ex06/trainer_state.json +183 -0
  40. nl_tasks/exp100/run_ex07/ft/adapter_config.json +18 -0
  41. nl_tasks/exp100/run_ex07/ft/special_tokens_map.json +24 -0
  42. nl_tasks/exp100/run_ex07/ft/tokenizer.json +0 -0
  43. nl_tasks/exp100/run_ex07/ft/tokenizer.model +3 -0
  44. nl_tasks/exp100/run_ex07/ft/tokenizer_config.json +43 -0
  45. nl_tasks/exp100/run_ex07/ft2/adapter_config.json +18 -0
  46. nl_tasks/exp100/run_ex07/ft2/adapter_model.bin +3 -0
  47. nl_tasks/exp100/run_ex07/trainer_state.json +260 -0
  48. nl_tasks/exp100/run_ex08/ft/special_tokens_map.json +24 -0
  49. nl_tasks/exp100/run_ex08/ft/tokenizer_config.json +43 -0
  50. nl_tasks/exp100/run_ex08/trainer_state.json +183 -0
nl_tasks/exp100/run_ex02/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex02/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex02/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex02/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex02/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex02/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex02/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02608c9a9b5b0dcbf11d02401bcc8d4e1fd5f0bd460dee7c3adc0605897bd4e4
3
+ size 33602915
nl_tasks/exp100/run_ex03/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex03/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex03/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex03/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex03/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex03/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex03/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d4bcfd74a9f9c4a5ca4334bde6a2536a60517a9efc3aa1ff8e80f64d3159d0
3
+ size 33602915
nl_tasks/exp100/run_ex03/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 6250,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.064,
14
+ "grad_norm": 0.11334197223186493,
15
+ "learning_rate": 0.009988082511541485,
16
+ "loss": 0.5559,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.128,
21
+ "grad_norm": 0.061305977404117584,
22
+ "learning_rate": 0.00992740539380525,
23
+ "loss": 0.2971,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.192,
28
+ "grad_norm": 0.06917975842952728,
29
+ "learning_rate": 0.00981595390941903,
30
+ "loss": 0.2748,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.256,
35
+ "grad_norm": 0.06461716443300247,
36
+ "learning_rate": 0.009654876508343738,
37
+ "loss": 0.2628,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 0.04892360046505928,
43
+ "learning_rate": 0.00944583300997063,
44
+ "loss": 0.2518,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.384,
49
+ "grad_norm": 0.03902921453118324,
50
+ "learning_rate": 0.00919097749954009,
51
+ "loss": 0.244,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.448,
56
+ "grad_norm": 0.042787957936525345,
57
+ "learning_rate": 0.008892936131406222,
58
+ "loss": 0.2388,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.512,
63
+ "grad_norm": 0.03384287655353546,
64
+ "learning_rate": 0.008554780067873127,
65
+ "loss": 0.2374,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.576,
70
+ "grad_norm": 0.03241891786456108,
71
+ "learning_rate": 0.008179993832454205,
72
+ "loss": 0.234,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.64,
77
+ "grad_norm": 0.028277236968278885,
78
+ "learning_rate": 0.007772439403657747,
79
+ "loss": 0.2328,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 0.704,
84
+ "grad_norm": 0.03353444114327431,
85
+ "learning_rate": 0.007336316419293858,
86
+ "loss": 0.2262,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 0.768,
91
+ "grad_norm": 0.026780247688293457,
92
+ "learning_rate": 0.006876118901376725,
93
+ "loss": 0.2233,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 0.832,
98
+ "grad_norm": 0.027920261025428772,
99
+ "learning_rate": 0.006396588947549764,
100
+ "loss": 0.2206,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 0.896,
105
+ "grad_norm": 0.026285970583558083,
106
+ "learning_rate": 0.005902667866219604,
107
+ "loss": 0.2173,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 0.96,
112
+ "grad_norm": 0.027279643341898918,
113
+ "learning_rate": 0.005399445258926061,
114
+ "loss": 0.2156,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.024,
119
+ "grad_norm": 0.02697896584868431,
120
+ "learning_rate": 0.004892106574628014,
121
+ "loss": 0.2049,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.088,
126
+ "grad_norm": 0.027274351567029953,
127
+ "learning_rate": 0.004385879676331144,
128
+ "loss": 0.1914,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.152,
133
+ "grad_norm": 0.02400428242981434,
134
+ "learning_rate": 0.003885980970660839,
135
+ "loss": 0.1937,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.216,
140
+ "grad_norm": 0.02517438866198063,
141
+ "learning_rate": 0.0033975616554871714,
142
+ "loss": 0.1913,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.28,
147
+ "grad_norm": 0.022209836170077324,
148
+ "learning_rate": 0.0029256546394924123,
149
+ "loss": 0.1883,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 1.3439999999999999,
154
+ "grad_norm": 0.021254699677228928,
155
+ "learning_rate": 0.0024751226806475142,
156
+ "loss": 0.1868,
157
+ "step": 4200
158
+ },
159
+ {
160
+ "epoch": 1.408,
161
+ "grad_norm": 0.021861741319298744,
162
+ "learning_rate": 0.002050608278003736,
163
+ "loss": 0.1848,
164
+ "step": 4400
165
+ },
166
+ {
167
+ "epoch": 1.472,
168
+ "grad_norm": 0.02483428828418255,
169
+ "learning_rate": 0.0016564858331386562,
170
+ "loss": 0.1848,
171
+ "step": 4600
172
+ },
173
+ {
174
+ "epoch": 1.536,
175
+ "grad_norm": 0.01872268132865429,
176
+ "learning_rate": 0.0012968165742081217,
177
+ "loss": 0.1775,
178
+ "step": 4800
179
+ },
180
+ {
181
+ "epoch": 1.6,
182
+ "grad_norm": 0.0213455967605114,
183
+ "learning_rate": 0.0009753067070884736,
184
+ "loss": 0.182,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 1.6640000000000001,
189
+ "grad_norm": 0.025262294337153435,
190
+ "learning_rate": 0.0006952692248399689,
191
+ "loss": 0.1778,
192
+ "step": 5200
193
+ },
194
+ {
195
+ "epoch": 1.728,
196
+ "grad_norm": 0.023666556924581528,
197
+ "learning_rate": 0.0004595897690250567,
198
+ "loss": 0.1763,
199
+ "step": 5400
200
+ },
201
+ {
202
+ "epoch": 1.792,
203
+ "grad_norm": 0.020242227241396904,
204
+ "learning_rate": 0.0002706968946630728,
205
+ "loss": 0.1744,
206
+ "step": 5600
207
+ },
208
+ {
209
+ "epoch": 1.8559999999999999,
210
+ "grad_norm": 0.020292259752750397,
211
+ "learning_rate": 0.00013053704522556165,
212
+ "loss": 0.1776,
213
+ "step": 5800
214
+ },
215
+ {
216
+ "epoch": 1.92,
217
+ "grad_norm": 0.024162383750081062,
218
+ "learning_rate": 4.055449554200896e-05,
219
+ "loss": 0.1721,
220
+ "step": 6000
221
+ },
222
+ {
223
+ "epoch": 1.984,
224
+ "grad_norm": 0.02264169603586197,
225
+ "learning_rate": 1.6764692939641446e-06,
226
+ "loss": 0.1728,
227
+ "step": 6200
228
+ },
229
+ {
230
+ "epoch": 2.0,
231
+ "step": 6250,
232
+ "total_flos": 4.0647058784256e+18,
233
+ "train_loss": 0.22118227462768555,
234
+ "train_runtime": 5603.7872,
235
+ "train_samples_per_second": 35.69,
236
+ "train_steps_per_second": 1.115
237
+ }
238
+ ],
239
+ "logging_steps": 200,
240
+ "max_steps": 6250,
241
+ "num_input_tokens_seen": 0,
242
+ "num_train_epochs": 2,
243
+ "save_steps": 0,
244
+ "stateful_callbacks": {
245
+ "TrainerControl": {
246
+ "args": {
247
+ "should_epoch_stop": false,
248
+ "should_evaluate": false,
249
+ "should_log": false,
250
+ "should_save": true,
251
+ "should_training_stop": true
252
+ },
253
+ "attributes": {}
254
+ }
255
+ },
256
+ "total_flos": 4.0647058784256e+18,
257
+ "train_batch_size": 32,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
nl_tasks/exp100/run_ex04/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex04/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex04/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex04/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex04/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex04/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "q_proj",
14
+ "v_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex04/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3ae0266c1a6651903cc003f29f14607c131c865fab0338ef1685771b547d6b
3
+ size 33602915
nl_tasks/exp100/run_ex04/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 6250,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.064,
14
+ "grad_norm": 0.04368972033262253,
15
+ "learning_rate": 0.049940412557707425,
16
+ "loss": 0.869,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.128,
21
+ "grad_norm": 0.028173571452498436,
22
+ "learning_rate": 0.049637026969026256,
23
+ "loss": 0.2959,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.192,
28
+ "grad_norm": 0.023793907836079597,
29
+ "learning_rate": 0.049079769547095156,
30
+ "loss": 0.2789,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.256,
35
+ "grad_norm": 0.029163537546992302,
36
+ "learning_rate": 0.048274382541718695,
37
+ "loss": 0.2695,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 0.026763763278722763,
43
+ "learning_rate": 0.047229165049853146,
44
+ "loss": 0.2608,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.384,
49
+ "grad_norm": 0.020129157230257988,
50
+ "learning_rate": 0.04595488749770045,
51
+ "loss": 0.2529,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.448,
56
+ "grad_norm": 0.02349601686000824,
57
+ "learning_rate": 0.0444646806570311,
58
+ "loss": 0.2478,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.512,
63
+ "grad_norm": 0.017336582764983177,
64
+ "learning_rate": 0.042773900339365636,
65
+ "loss": 0.2475,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.576,
70
+ "grad_norm": 0.017148617655038834,
71
+ "learning_rate": 0.040899969162271024,
72
+ "loss": 0.2448,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.64,
77
+ "grad_norm": 0.014803516678512096,
78
+ "learning_rate": 0.03886219701828874,
79
+ "loss": 0.2445,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 0.704,
84
+ "grad_norm": 0.012438619509339333,
85
+ "learning_rate": 0.03668158209646929,
86
+ "loss": 0.238,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 0.768,
91
+ "grad_norm": 0.014171008951961994,
92
+ "learning_rate": 0.034380594506883626,
93
+ "loss": 0.2348,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 0.832,
98
+ "grad_norm": 0.01251581683754921,
99
+ "learning_rate": 0.03198294473774882,
100
+ "loss": 0.2325,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 0.896,
105
+ "grad_norm": 0.011021828278899193,
106
+ "learning_rate": 0.029513339331098024,
107
+ "loss": 0.2287,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 0.96,
112
+ "grad_norm": 0.011575430631637573,
113
+ "learning_rate": 0.026997226294630305,
114
+ "loss": 0.2273,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.024,
119
+ "grad_norm": 0.011999037116765976,
120
+ "learning_rate": 0.024460532873140067,
121
+ "loss": 0.2178,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.088,
126
+ "grad_norm": 0.012413745746016502,
127
+ "learning_rate": 0.021929398381655724,
128
+ "loss": 0.2063,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.152,
133
+ "grad_norm": 0.012539232149720192,
134
+ "learning_rate": 0.019429904853304196,
135
+ "loss": 0.2096,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.216,
140
+ "grad_norm": 0.011288085952401161,
141
+ "learning_rate": 0.016987808277435856,
142
+ "loss": 0.2062,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.28,
147
+ "grad_norm": 0.01744219847023487,
148
+ "learning_rate": 0.014628273197462061,
149
+ "loss": 0.2029,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 1.3439999999999999,
154
+ "grad_norm": 0.011675246991217136,
155
+ "learning_rate": 0.01237561340323757,
156
+ "loss": 0.2011,
157
+ "step": 4200
158
+ },
159
+ {
160
+ "epoch": 1.408,
161
+ "grad_norm": 0.01028984971344471,
162
+ "learning_rate": 0.010253041390018681,
163
+ "loss": 0.1981,
164
+ "step": 4400
165
+ },
166
+ {
167
+ "epoch": 1.472,
168
+ "grad_norm": 0.009951326064765453,
169
+ "learning_rate": 0.008282429165693281,
170
+ "loss": 0.1974,
171
+ "step": 4600
172
+ },
173
+ {
174
+ "epoch": 1.536,
175
+ "grad_norm": 0.00813743844628334,
176
+ "learning_rate": 0.006484082871040609,
177
+ "loss": 0.1889,
178
+ "step": 4800
179
+ },
180
+ {
181
+ "epoch": 1.6,
182
+ "grad_norm": 0.009917319752275944,
183
+ "learning_rate": 0.004876533535442368,
184
+ "loss": 0.1921,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 1.6640000000000001,
189
+ "grad_norm": 0.010274921543896198,
190
+ "learning_rate": 0.003476346124199845,
191
+ "loss": 0.1873,
192
+ "step": 5200
193
+ },
194
+ {
195
+ "epoch": 1.728,
196
+ "grad_norm": 0.011271242052316666,
197
+ "learning_rate": 0.0022979488451252833,
198
+ "loss": 0.1844,
199
+ "step": 5400
200
+ },
201
+ {
202
+ "epoch": 1.792,
203
+ "grad_norm": 0.009762358851730824,
204
+ "learning_rate": 0.001353484473315364,
205
+ "loss": 0.1814,
206
+ "step": 5600
207
+ },
208
+ {
209
+ "epoch": 1.8559999999999999,
210
+ "grad_norm": 0.00860436912626028,
211
+ "learning_rate": 0.0006526852261278083,
212
+ "loss": 0.1838,
213
+ "step": 5800
214
+ },
215
+ {
216
+ "epoch": 1.92,
217
+ "grad_norm": 0.010398217476904392,
218
+ "learning_rate": 0.00020277247771004482,
219
+ "loss": 0.1775,
220
+ "step": 6000
221
+ },
222
+ {
223
+ "epoch": 1.984,
224
+ "grad_norm": 0.010314074344933033,
225
+ "learning_rate": 8.382346469820723e-06,
226
+ "loss": 0.1783,
227
+ "step": 6200
228
+ },
229
+ {
230
+ "epoch": 2.0,
231
+ "step": 6250,
232
+ "total_flos": 4.0647058784256e+18,
233
+ "train_loss": 0.2409750535583496,
234
+ "train_runtime": 5596.8758,
235
+ "train_samples_per_second": 35.734,
236
+ "train_steps_per_second": 1.117
237
+ }
238
+ ],
239
+ "logging_steps": 200,
240
+ "max_steps": 6250,
241
+ "num_input_tokens_seen": 0,
242
+ "num_train_epochs": 2,
243
+ "save_steps": 0,
244
+ "stateful_callbacks": {
245
+ "TrainerControl": {
246
+ "args": {
247
+ "should_epoch_stop": false,
248
+ "should_evaluate": false,
249
+ "should_log": false,
250
+ "should_save": true,
251
+ "should_training_stop": true
252
+ },
253
+ "attributes": {}
254
+ }
255
+ },
256
+ "total_flos": 4.0647058784256e+18,
257
+ "train_batch_size": 32,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
nl_tasks/exp100/run_ex05/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex05/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex05/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex05/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex05/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex05/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex05/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a6b1824b00e9dee64d31ee13cebccc364da375813e22cd7363a5dba7c0f92d
3
+ size 33602915
nl_tasks/exp100/run_ex05/trainer_state.json ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 9375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.064,
14
+ "grad_norm": 0.1212976798415184,
15
+ "learning_rate": 0.00999684221114305,
16
+ "loss": 0.6038,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.128,
21
+ "grad_norm": 0.065467968583107,
22
+ "learning_rate": 0.009973376564462873,
23
+ "loss": 0.2948,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.192,
28
+ "grad_norm": 0.06629683822393417,
29
+ "learning_rate": 0.009927125570277145,
30
+ "loss": 0.2742,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.256,
35
+ "grad_norm": 0.06881999224424362,
36
+ "learning_rate": 0.009858301125867589,
37
+ "loss": 0.2629,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.32,
42
+ "grad_norm": 0.14311932027339935,
43
+ "learning_rate": 0.009767218547973728,
44
+ "loss": 0.253,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.384,
49
+ "grad_norm": 0.052844878286123276,
50
+ "learning_rate": 0.009654295128180494,
51
+ "loss": 0.2444,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.448,
56
+ "grad_norm": 0.045355528593063354,
57
+ "learning_rate": 0.009520048221111679,
58
+ "loss": 0.2394,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.512,
63
+ "grad_norm": 0.03619716316461563,
64
+ "learning_rate": 0.009365092874188177,
65
+ "loss": 0.2379,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.576,
70
+ "grad_norm": 0.03572649136185646,
71
+ "learning_rate": 0.00919013900981014,
72
+ "loss": 0.2351,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.64,
77
+ "grad_norm": 0.02887910045683384,
78
+ "learning_rate": 0.008995988172872798,
79
+ "loss": 0.2343,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 0.704,
84
+ "grad_norm": 0.030390536412596703,
85
+ "learning_rate": 0.008783529858517077,
86
+ "loss": 0.2281,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 0.768,
91
+ "grad_norm": 0.02559584006667137,
92
+ "learning_rate": 0.008553737436939324,
93
+ "loss": 0.226,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 0.832,
98
+ "grad_norm": 0.028384791687130928,
99
+ "learning_rate": 0.008307663693930425,
100
+ "loss": 0.2239,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 0.896,
105
+ "grad_norm": 0.022031353786587715,
106
+ "learning_rate": 0.00804643600757522,
107
+ "loss": 0.2205,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 0.96,
112
+ "grad_norm": 0.02517508529126644,
113
+ "learning_rate": 0.007771251183209993,
114
+ "loss": 0.2194,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.024,
119
+ "grad_norm": 0.02236510068178177,
120
+ "learning_rate": 0.007483369970301455,
121
+ "loss": 0.2098,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.088,
126
+ "grad_norm": 0.02561621367931366,
127
+ "learning_rate": 0.0071841112863680005,
128
+ "loss": 0.1984,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.152,
133
+ "grad_norm": 0.024599742144346237,
134
+ "learning_rate": 0.006874846174406093,
135
+ "loss": 0.2025,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.216,
140
+ "grad_norm": 0.02521314099431038,
141
+ "learning_rate": 0.006556991521505633,
142
+ "loss": 0.2005,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.28,
147
+ "grad_norm": 0.019666949287056923,
148
+ "learning_rate": 0.006232003567432242,
149
+ "loss": 0.1988,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 1.3439999999999999,
154
+ "grad_norm": 0.020948218181729317,
155
+ "learning_rate": 0.005901371232916675,
156
+ "loss": 0.1985,
157
+ "step": 4200
158
+ },
159
+ {
160
+ "epoch": 1.408,
161
+ "grad_norm": 0.022395219653844833,
162
+ "learning_rate": 0.005566609298217553,
163
+ "loss": 0.1972,
164
+ "step": 4400
165
+ },
166
+ {
167
+ "epoch": 1.472,
168
+ "grad_norm": 0.020159346982836723,
169
+ "learning_rate": 0.005229251463209568,
170
+ "loss": 0.198,
171
+ "step": 4600
172
+ },
173
+ {
174
+ "epoch": 1.536,
175
+ "grad_norm": 0.016137801110744476,
176
+ "learning_rate": 0.004890843320792184,
177
+ "loss": 0.1915,
178
+ "step": 4800
179
+ },
180
+ {
181
+ "epoch": 1.6,
182
+ "grad_norm": 0.017183274030685425,
183
+ "learning_rate": 0.004552935275810833,
184
+ "loss": 0.1971,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 1.6640000000000001,
189
+ "grad_norm": 0.01746981218457222,
190
+ "learning_rate": 0.004217075441932357,
191
+ "loss": 0.1929,
192
+ "step": 5200
193
+ },
194
+ {
195
+ "epoch": 1.728,
196
+ "grad_norm": 0.01835208386182785,
197
+ "learning_rate": 0.0038848025490174254,
198
+ "loss": 0.191,
199
+ "step": 5400
200
+ },
201
+ {
202
+ "epoch": 1.792,
203
+ "grad_norm": 0.017558401450514793,
204
+ "learning_rate": 0.0035576388934845005,
205
+ "loss": 0.1889,
206
+ "step": 5600
207
+ },
208
+ {
209
+ "epoch": 1.8559999999999999,
210
+ "grad_norm": 0.016855215653777122,
211
+ "learning_rate": 0.003237083363963042,
212
+ "loss": 0.1914,
213
+ "step": 5800
214
+ },
215
+ {
216
+ "epoch": 1.92,
217
+ "grad_norm": 0.01803995855152607,
218
+ "learning_rate": 0.0029246045741886696,
219
+ "loss": 0.185,
220
+ "step": 6000
221
+ },
222
+ {
223
+ "epoch": 1.984,
224
+ "grad_norm": 0.019782407209277153,
225
+ "learning_rate": 0.0026216341346016613,
226
+ "loss": 0.1845,
227
+ "step": 6200
228
+ },
229
+ {
230
+ "epoch": 2.048,
231
+ "grad_norm": 0.018053608015179634,
232
+ "learning_rate": 0.0023295600934747397,
233
+ "loss": 0.1674,
234
+ "step": 6400
235
+ },
236
+ {
237
+ "epoch": 2.112,
238
+ "grad_norm": 0.021936679258942604,
239
+ "learning_rate": 0.002049720577619374,
240
+ "loss": 0.1611,
241
+ "step": 6600
242
+ },
243
+ {
244
+ "epoch": 2.176,
245
+ "grad_norm": 0.01545505877584219,
246
+ "learning_rate": 0.0017833976618054676,
247
+ "loss": 0.1611,
248
+ "step": 6800
249
+ },
250
+ {
251
+ "epoch": 2.24,
252
+ "grad_norm": 0.018597135320305824,
253
+ "learning_rate": 0.001531811494981501,
254
+ "loss": 0.1582,
255
+ "step": 7000
256
+ },
257
+ {
258
+ "epoch": 2.304,
259
+ "grad_norm": 0.01862194575369358,
260
+ "learning_rate": 0.001296114710205592,
261
+ "loss": 0.1617,
262
+ "step": 7200
263
+ },
264
+ {
265
+ "epoch": 2.368,
266
+ "grad_norm": 0.017578421160578728,
267
+ "learning_rate": 0.0010773871438982197,
268
+ "loss": 0.1546,
269
+ "step": 7400
270
+ },
271
+ {
272
+ "epoch": 2.432,
273
+ "grad_norm": 0.01894843764603138,
274
+ "learning_rate": 0.0008766308886101404,
275
+ "loss": 0.1593,
276
+ "step": 7600
277
+ },
278
+ {
279
+ "epoch": 2.496,
280
+ "grad_norm": 0.01759357750415802,
281
+ "learning_rate": 0.0006947657019710795,
282
+ "loss": 0.1569,
283
+ "step": 7800
284
+ },
285
+ {
286
+ "epoch": 2.56,
287
+ "grad_norm": 0.0183447003364563,
288
+ "learning_rate": 0.0005326247928529187,
289
+ "loss": 0.1597,
290
+ "step": 8000
291
+ },
292
+ {
293
+ "epoch": 2.624,
294
+ "grad_norm": 0.01979999430477619,
295
+ "learning_rate": 0.000390951004052949,
296
+ "loss": 0.1559,
297
+ "step": 8200
298
+ },
299
+ {
300
+ "epoch": 2.6879999999999997,
301
+ "grad_norm": 0.01742800511419773,
302
+ "learning_rate": 0.0002703934089860627,
303
+ "loss": 0.1531,
304
+ "step": 8400
305
+ },
306
+ {
307
+ "epoch": 2.752,
308
+ "grad_norm": 0.018611254170536995,
309
+ "learning_rate": 0.00017150433797803011,
310
+ "loss": 0.1532,
311
+ "step": 8600
312
+ },
313
+ {
314
+ "epoch": 2.816,
315
+ "grad_norm": 0.019568437710404396,
316
+ "learning_rate": 9.473684778379676e-05,
317
+ "loss": 0.1549,
318
+ "step": 8800
319
+ },
320
+ {
321
+ "epoch": 2.88,
322
+ "grad_norm": 0.01714991219341755,
323
+ "learning_rate": 4.04426459241064e-05,
324
+ "loss": 0.1533,
325
+ "step": 9000
326
+ },
327
+ {
328
+ "epoch": 2.944,
329
+ "grad_norm": 0.02003113366663456,
330
+ "learning_rate": 8.87047935002272e-06,
331
+ "loss": 0.1541,
332
+ "step": 9200
333
+ },
334
+ {
335
+ "epoch": 3.0,
336
+ "step": 9375,
337
+ "total_flos": 6.0970588176384e+18,
338
+ "train_loss": 0.2053676788330078,
339
+ "train_runtime": 8368.9804,
340
+ "train_samples_per_second": 35.847,
341
+ "train_steps_per_second": 1.12
342
+ }
343
+ ],
344
+ "logging_steps": 200,
345
+ "max_steps": 9375,
346
+ "num_input_tokens_seen": 0,
347
+ "num_train_epochs": 3,
348
+ "save_steps": 0,
349
+ "stateful_callbacks": {
350
+ "TrainerControl": {
351
+ "args": {
352
+ "should_epoch_stop": false,
353
+ "should_evaluate": false,
354
+ "should_log": false,
355
+ "should_save": true,
356
+ "should_training_stop": true
357
+ },
358
+ "attributes": {}
359
+ }
360
+ },
361
+ "total_flos": 6.0970588176384e+18,
362
+ "train_batch_size": 32,
363
+ "trial_name": null,
364
+ "trial_params": null
365
+ }
nl_tasks/exp100/run_ex06/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex06/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex06/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex06/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex06/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex06/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex06/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b6eff2d5fa7579af4e75d06b790b1a0331b1bcb1e9d1a3259da889964fbf273
3
+ size 33602915
nl_tasks/exp100/run_ex06/trainer_state.json ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4168,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.09596928982725528,
14
+ "grad_norm": 0.6366388201713562,
15
+ "learning_rate": 0.009964316835038782,
16
+ "loss": 5.3796,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.19193857965451055,
21
+ "grad_norm": 0.05433971807360649,
22
+ "learning_rate": 0.00981641298054017,
23
+ "loss": 2.1242,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.28790786948176583,
28
+ "grad_norm": 0.05430266633629799,
29
+ "learning_rate": 0.00955703204967729,
30
+ "loss": 0.2875,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.3838771593090211,
35
+ "grad_norm": 0.042459528893232346,
36
+ "learning_rate": 0.009192177478607726,
37
+ "loss": 0.2663,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.4798464491362764,
42
+ "grad_norm": 0.038593146950006485,
43
+ "learning_rate": 0.008730293917124864,
44
+ "loss": 0.251,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.5758157389635317,
49
+ "grad_norm": 0.04011659324169159,
50
+ "learning_rate": 0.008182071775138081,
51
+ "loss": 0.2465,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.6717850287907869,
56
+ "grad_norm": 0.0388704277575016,
57
+ "learning_rate": 0.007560199790476499,
58
+ "loss": 0.241,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.7677543186180422,
63
+ "grad_norm": 0.03452278673648834,
64
+ "learning_rate": 0.006879071344895999,
65
+ "loss": 0.2317,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.8637236084452975,
70
+ "grad_norm": 0.035173822194337845,
71
+ "learning_rate": 0.006154451325678603,
72
+ "loss": 0.2279,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.9596928982725528,
77
+ "grad_norm": 0.03411315754055977,
78
+ "learning_rate": 0.005403111243395296,
79
+ "loss": 0.2222,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 1.055662188099808,
84
+ "grad_norm": 0.03330094739794731,
85
+ "learning_rate": 0.004642441051122492,
86
+ "loss": 0.2085,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 1.1516314779270633,
91
+ "grad_norm": 0.03454425185918808,
92
+ "learning_rate": 0.003890046649652885,
93
+ "loss": 0.202,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 1.2476007677543186,
98
+ "grad_norm": 0.031027931720018387,
99
+ "learning_rate": 0.0031633423945426832,
100
+ "loss": 0.1988,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 1.3435700575815739,
105
+ "grad_norm": 0.028650769963860512,
106
+ "learning_rate": 0.002479148036521003,
107
+ "loss": 0.1959,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 1.4395393474088292,
112
+ "grad_norm": 0.031158311292529106,
113
+ "learning_rate": 0.0018532994241758595,
114
+ "loss": 0.1933,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.5355086372360844,
119
+ "grad_norm": 0.031318724155426025,
120
+ "learning_rate": 0.0013002819792999314,
121
+ "loss": 0.1882,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.6314779270633397,
126
+ "grad_norm": 0.03243976831436157,
127
+ "learning_rate": 0.0008328954282003914,
128
+ "loss": 0.1887,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.727447216890595,
133
+ "grad_norm": 0.031175516545772552,
134
+ "learning_rate": 0.00046195754884998665,
135
+ "loss": 0.1856,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.8234165067178503,
140
+ "grad_norm": 0.028726134449243546,
141
+ "learning_rate": 0.00019605379072529185,
142
+ "loss": 0.1848,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.9193857965451055,
147
+ "grad_norm": 0.034339435398578644,
148
+ "learning_rate": 4.133856244321388e-05,
149
+ "loss": 0.1809,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 2.0,
154
+ "step": 4168,
155
+ "total_flos": 4.0647058784256e+18,
156
+ "train_loss": 0.5545519496566275,
157
+ "train_runtime": 5547.6753,
158
+ "train_samples_per_second": 36.051,
159
+ "train_steps_per_second": 0.751
160
+ }
161
+ ],
162
+ "logging_steps": 200,
163
+ "max_steps": 4168,
164
+ "num_input_tokens_seen": 0,
165
+ "num_train_epochs": 2,
166
+ "save_steps": 0,
167
+ "stateful_callbacks": {
168
+ "TrainerControl": {
169
+ "args": {
170
+ "should_epoch_stop": false,
171
+ "should_evaluate": false,
172
+ "should_log": false,
173
+ "should_save": true,
174
+ "should_training_stop": true
175
+ },
176
+ "attributes": {}
177
+ }
178
+ },
179
+ "total_flos": 4.0647058784256e+18,
180
+ "train_batch_size": 48,
181
+ "trial_name": null,
182
+ "trial_params": null
183
+ }
nl_tasks/exp100/run_ex07/ft/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": false,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex07/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex07/ft/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
nl_tasks/exp100/run_ex07/ft/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
nl_tasks/exp100/run_ex07/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex07/ft2/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "T": 1.0,
3
+ "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
4
+ "bias": "none",
5
+ "inference_mode": true,
6
+ "layers_to_transform": null,
7
+ "modules_to_save": null,
8
+ "num_rotations": 1,
9
+ "peft_type": "ROTATION",
10
+ "r": 16,
11
+ "revision": null,
12
+ "target_modules": [
13
+ "v_proj",
14
+ "q_proj"
15
+ ],
16
+ "target_modules_to_skip": null,
17
+ "task_type": "CAUSAL_LM"
18
+ }
nl_tasks/exp100/run_ex07/ft2/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463d7e91ab4fbd7f09a9bbaff1bf0cc91021c6cd2d4cb0c851d89a72c5e61e13
3
+ size 33602915
nl_tasks/exp100/run_ex07/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 6252,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.09596928982725528,
14
+ "grad_norm": 0.08396021276712418,
15
+ "learning_rate": 0.009988090209605933,
16
+ "loss": 6.13,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.19193857965451055,
21
+ "grad_norm": 0.5583938956260681,
22
+ "learning_rate": 0.009927452190923135,
23
+ "loss": 4.905,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.28790786948176583,
28
+ "grad_norm": 0.04631821811199188,
29
+ "learning_rate": 0.009816072106071381,
30
+ "loss": 0.8658,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.3838771593090211,
35
+ "grad_norm": 0.04276253283023834,
36
+ "learning_rate": 0.00965509692825639,
37
+ "loss": 0.283,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.4798464491362764,
42
+ "grad_norm": 0.03793822228908539,
43
+ "learning_rate": 0.009446184352531868,
44
+ "loss": 0.2622,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.5758157389635317,
49
+ "grad_norm": 0.03757239133119583,
50
+ "learning_rate": 0.009191485725137248,
51
+ "loss": 0.2549,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.6717850287907869,
56
+ "grad_norm": 0.03672794625163078,
57
+ "learning_rate": 0.00889362388929934,
58
+ "loss": 0.2488,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.7677543186180422,
63
+ "grad_norm": 0.03426536172628403,
64
+ "learning_rate": 0.008555666175638042,
65
+ "loss": 0.2386,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.8637236084452975,
70
+ "grad_norm": 0.0342838317155838,
71
+ "learning_rate": 0.008181092815316326,
72
+ "loss": 0.2349,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.9596928982725528,
77
+ "grad_norm": 0.03222282603383064,
78
+ "learning_rate": 0.007773761101210539,
79
+ "loss": 0.2294,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 1.055662188099808,
84
+ "grad_norm": 0.033653076738119125,
85
+ "learning_rate": 0.0073378656661631705,
86
+ "loss": 0.2174,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 1.1516314779270633,
91
+ "grad_norm": 0.037820011377334595,
92
+ "learning_rate": 0.006877895287365947,
93
+ "loss": 0.2128,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 1.2476007677543186,
98
+ "grad_norm": 0.03313204646110535,
99
+ "learning_rate": 0.006398586661694321,
100
+ "loss": 0.211,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 1.3435700575815739,
105
+ "grad_norm": 0.038085468113422394,
106
+ "learning_rate": 0.0059048756280071606,
107
+ "loss": 0.2086,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 1.4395393474088292,
112
+ "grad_norm": 0.027709227055311203,
113
+ "learning_rate": 0.0054018463387160995,
114
+ "loss": 0.2073,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.5355086372360844,
119
+ "grad_norm": 0.03159138932824135,
120
+ "learning_rate": 0.004894678904047102,
121
+ "loss": 0.2028,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.6314779270633397,
126
+ "grad_norm": 0.032402586191892624,
127
+ "learning_rate": 0.004388596048144727,
128
+ "loss": 0.2047,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.727447216890595,
133
+ "grad_norm": 0.028337595984339714,
134
+ "learning_rate": 0.003888809326345519,
135
+ "loss": 0.2012,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.8234165067178503,
140
+ "grad_norm": 0.026909947395324707,
141
+ "learning_rate": 0.0034004654574658734,
142
+ "loss": 0.1998,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.9193857965451055,
147
+ "grad_norm": 0.031789738684892654,
148
+ "learning_rate": 0.002928593323765374,
149
+ "loss": 0.195,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 2.015355086372361,
154
+ "grad_norm": 0.029551630839705467,
155
+ "learning_rate": 0.002478052184371007,
156
+ "loss": 0.1912,
157
+ "step": 4200
158
+ },
159
+ {
160
+ "epoch": 2.111324376199616,
161
+ "grad_norm": 0.03310905396938324,
162
+ "learning_rate": 0.002053481635451576,
163
+ "loss": 0.1712,
164
+ "step": 4400
165
+ },
166
+ {
167
+ "epoch": 2.2072936660268714,
168
+ "grad_norm": 0.02764611691236496,
169
+ "learning_rate": 0.0016592538324439927,
170
+ "loss": 0.1706,
171
+ "step": 4600
172
+ },
173
+ {
174
+ "epoch": 2.3032629558541267,
175
+ "grad_norm": 0.027012605220079422,
176
+ "learning_rate": 0.0012994284663388062,
177
+ "loss": 0.1718,
178
+ "step": 4800
179
+ },
180
+ {
181
+ "epoch": 2.399232245681382,
182
+ "grad_norm": 0.028096886351704597,
183
+ "learning_rate": 0.0009777109576715644,
184
+ "loss": 0.1659,
185
+ "step": 5000
186
+ },
187
+ {
188
+ "epoch": 2.495201535508637,
189
+ "grad_norm": 0.030632272362709045,
190
+ "learning_rate": 0.0006974142987311794,
191
+ "loss": 0.169,
192
+ "step": 5200
193
+ },
194
+ {
195
+ "epoch": 2.5911708253358925,
196
+ "grad_norm": 0.028473207727074623,
197
+ "learning_rate": 0.0004614249369277501,
198
+ "loss": 0.1681,
199
+ "step": 5400
200
+ },
201
+ {
202
+ "epoch": 2.6871401151631478,
203
+ "grad_norm": 0.03663257881999016,
204
+ "learning_rate": 0.0002721730506471498,
205
+ "loss": 0.1651,
206
+ "step": 5600
207
+ },
208
+ {
209
+ "epoch": 2.783109404990403,
210
+ "grad_norm": 0.02837834507226944,
211
+ "learning_rate": 0.00013160752368655492,
212
+ "loss": 0.1629,
213
+ "step": 5800
214
+ },
215
+ {
216
+ "epoch": 2.8790786948176583,
217
+ "grad_norm": 0.03114727884531021,
218
+ "learning_rate": 4.11758759799491e-05,
219
+ "loss": 0.1642,
220
+ "step": 6000
221
+ },
222
+ {
223
+ "epoch": 2.9750479846449136,
224
+ "grad_norm": 0.028946418315172195,
225
+ "learning_rate": 1.809357283573676e-06,
226
+ "loss": 0.1653,
227
+ "step": 6200
228
+ },
229
+ {
230
+ "epoch": 3.0,
231
+ "step": 6252,
232
+ "total_flos": 6.0970588176384e+18,
233
+ "train_loss": 0.5637188162166037,
234
+ "train_runtime": 8208.1422,
235
+ "train_samples_per_second": 36.549,
236
+ "train_steps_per_second": 0.762
237
+ }
238
+ ],
239
+ "logging_steps": 200,
240
+ "max_steps": 6252,
241
+ "num_input_tokens_seen": 0,
242
+ "num_train_epochs": 3,
243
+ "save_steps": 0,
244
+ "stateful_callbacks": {
245
+ "TrainerControl": {
246
+ "args": {
247
+ "should_epoch_stop": false,
248
+ "should_evaluate": false,
249
+ "should_log": false,
250
+ "should_save": true,
251
+ "should_training_stop": true
252
+ },
253
+ "attributes": {}
254
+ }
255
+ },
256
+ "total_flos": 6.0970588176384e+18,
257
+ "train_batch_size": 48,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
nl_tasks/exp100/run_ex08/ft/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
nl_tasks/exp100/run_ex08/ft/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 512,
37
+ "pad_token": "<unk>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
nl_tasks/exp100/run_ex08/trainer_state.json ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 4168,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.09596928982725528,
14
+ "grad_norm": 0.05143404379487038,
15
+ "learning_rate": 0.019928633670077564,
16
+ "loss": 0.7492,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.19193857965451055,
21
+ "grad_norm": 0.049090251326560974,
22
+ "learning_rate": 0.01963282596108034,
23
+ "loss": 0.2822,
24
+ "step": 400
25
+ },
26
+ {
27
+ "epoch": 0.28790786948176583,
28
+ "grad_norm": 0.03802068158984184,
29
+ "learning_rate": 0.01911406409935458,
30
+ "loss": 0.2606,
31
+ "step": 600
32
+ },
33
+ {
34
+ "epoch": 0.3838771593090211,
35
+ "grad_norm": 0.03355047479271889,
36
+ "learning_rate": 0.01838435495721545,
37
+ "loss": 0.2506,
38
+ "step": 800
39
+ },
40
+ {
41
+ "epoch": 0.4798464491362764,
42
+ "grad_norm": 0.03283309563994408,
43
+ "learning_rate": 0.017460587834249728,
44
+ "loss": 0.2402,
45
+ "step": 1000
46
+ },
47
+ {
48
+ "epoch": 0.5758157389635317,
49
+ "grad_norm": 0.028441881760954857,
50
+ "learning_rate": 0.016364143550276163,
51
+ "loss": 0.2378,
52
+ "step": 1200
53
+ },
54
+ {
55
+ "epoch": 0.6717850287907869,
56
+ "grad_norm": 0.025979651138186455,
57
+ "learning_rate": 0.015120399580952997,
58
+ "loss": 0.2336,
59
+ "step": 1400
60
+ },
61
+ {
62
+ "epoch": 0.7677543186180422,
63
+ "grad_norm": 0.023276863619685173,
64
+ "learning_rate": 0.013758142689791999,
65
+ "loss": 0.2249,
66
+ "step": 1600
67
+ },
68
+ {
69
+ "epoch": 0.8637236084452975,
70
+ "grad_norm": 0.02259671501815319,
71
+ "learning_rate": 0.012308902651357206,
72
+ "loss": 0.2221,
73
+ "step": 1800
74
+ },
75
+ {
76
+ "epoch": 0.9596928982725528,
77
+ "grad_norm": 0.01977524533867836,
78
+ "learning_rate": 0.010806222486790591,
79
+ "loss": 0.2172,
80
+ "step": 2000
81
+ },
82
+ {
83
+ "epoch": 1.055662188099808,
84
+ "grad_norm": 0.02073797583580017,
85
+ "learning_rate": 0.009284882102244985,
86
+ "loss": 0.2027,
87
+ "step": 2200
88
+ },
89
+ {
90
+ "epoch": 1.1516314779270633,
91
+ "grad_norm": 0.020284440368413925,
92
+ "learning_rate": 0.00778009329930577,
93
+ "loss": 0.1956,
94
+ "step": 2400
95
+ },
96
+ {
97
+ "epoch": 1.2476007677543186,
98
+ "grad_norm": 0.02195167914032936,
99
+ "learning_rate": 0.0063266847890853664,
100
+ "loss": 0.1924,
101
+ "step": 2600
102
+ },
103
+ {
104
+ "epoch": 1.3435700575815739,
105
+ "grad_norm": 0.01651054248213768,
106
+ "learning_rate": 0.004958296073042006,
107
+ "loss": 0.1901,
108
+ "step": 2800
109
+ },
110
+ {
111
+ "epoch": 1.4395393474088292,
112
+ "grad_norm": 0.021048910915851593,
113
+ "learning_rate": 0.003706598848351719,
114
+ "loss": 0.1877,
115
+ "step": 3000
116
+ },
117
+ {
118
+ "epoch": 1.5355086372360844,
119
+ "grad_norm": 0.01921105571091175,
120
+ "learning_rate": 0.002600563958599863,
121
+ "loss": 0.1824,
122
+ "step": 3200
123
+ },
124
+ {
125
+ "epoch": 1.6314779270633397,
126
+ "grad_norm": 0.02395492233335972,
127
+ "learning_rate": 0.0016657908564007829,
128
+ "loss": 0.1825,
129
+ "step": 3400
130
+ },
131
+ {
132
+ "epoch": 1.727447216890595,
133
+ "grad_norm": 0.01738915778696537,
134
+ "learning_rate": 0.0009239150976999733,
135
+ "loss": 0.1793,
136
+ "step": 3600
137
+ },
138
+ {
139
+ "epoch": 1.8234165067178503,
140
+ "grad_norm": 0.018468189984560013,
141
+ "learning_rate": 0.0003921075814505837,
142
+ "loss": 0.1784,
143
+ "step": 3800
144
+ },
145
+ {
146
+ "epoch": 1.9193857965451055,
147
+ "grad_norm": 0.017226792871952057,
148
+ "learning_rate": 8.267712488642776e-05,
149
+ "loss": 0.1743,
150
+ "step": 4000
151
+ },
152
+ {
153
+ "epoch": 2.0,
154
+ "step": 4168,
155
+ "total_flos": 4.0647058784256e+18,
156
+ "train_loss": 0.2365957404738882,
157
+ "train_runtime": 5474.4546,
158
+ "train_samples_per_second": 36.533,
159
+ "train_steps_per_second": 0.761
160
+ }
161
+ ],
162
+ "logging_steps": 200,
163
+ "max_steps": 4168,
164
+ "num_input_tokens_seen": 0,
165
+ "num_train_epochs": 2,
166
+ "save_steps": 0,
167
+ "stateful_callbacks": {
168
+ "TrainerControl": {
169
+ "args": {
170
+ "should_epoch_stop": false,
171
+ "should_evaluate": false,
172
+ "should_log": false,
173
+ "should_save": true,
174
+ "should_training_stop": true
175
+ },
176
+ "attributes": {}
177
+ }
178
+ },
179
+ "total_flos": 4.0647058784256e+18,
180
+ "train_batch_size": 48,
181
+ "trial_name": null,
182
+ "trial_params": null
183
+ }