Federic commited on
Commit
8e0dc0e
1 Parent(s): b0a84bf

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: defog/sqlcoder-7b-2
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
+ base_model: meta-llama/Llama-2-13b-hf
4
  ---
5
 
6
  # Model Card for Model ID
last-checkpoint/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "defog/sqlcoder-7b-2",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -9,7 +9,7 @@
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
- "lora_alpha": 32,
13
  "lora_dropout": 0.1,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
@@ -20,9 +20,9 @@
20
  "revision": null,
21
  "target_modules": [
22
  "v_proj",
23
- "q_proj",
24
  "k_proj",
25
- "o_proj"
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
+ "lora_alpha": 64,
13
  "lora_dropout": 0.1,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
 
20
  "revision": null,
21
  "target_modules": [
22
  "v_proj",
 
23
  "k_proj",
24
+ "o_proj",
25
+ "q_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f567278abac0a182c45835aeeebcf74ff1ae9af34c0dbacb7ab3fe490aa5b1e
3
- size 536906096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335ff3ab02635313a29bd60a29e36f5945243b709320d83cfa5ece896a4ce0b6
3
+ size 838904832
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2673747580f82b94b8d87d9b609fdfb003c7ce6c8cf6a1642e31c0f18ac6409d
3
- size 1073950458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611c64f6ffdb40b6b7f76b9a41a0c02b3b92b97b992a64dacf57259d7b9ca2a2
3
+ size 1677982394
last-checkpoint/special_tokens_map.json CHANGED
@@ -1,14 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "▁<PRE>",
4
- "▁<MID>",
5
- "▁<SUF>",
6
- "▁<EOT>",
7
- "▁<PRE>",
8
- "▁<MID>",
9
- "▁<SUF>",
10
- "▁<EOT>"
11
- ],
12
  "bos_token": {
13
  "content": "<s>",
14
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
last-checkpoint/tokenizer.json CHANGED
@@ -34,42 +34,6 @@
34
  "rstrip": false,
35
  "normalized": false,
36
  "special": true
37
- },
38
- {
39
- "id": 32007,
40
- "content": "▁<PRE>",
41
- "single_word": false,
42
- "lstrip": false,
43
- "rstrip": false,
44
- "normalized": false,
45
- "special": true
46
- },
47
- {
48
- "id": 32008,
49
- "content": "▁<SUF>",
50
- "single_word": false,
51
- "lstrip": false,
52
- "rstrip": false,
53
- "normalized": false,
54
- "special": true
55
- },
56
- {
57
- "id": 32009,
58
- "content": "▁<MID>",
59
- "single_word": false,
60
- "lstrip": false,
61
- "rstrip": false,
62
- "normalized": false,
63
- "special": true
64
- },
65
- {
66
- "id": 32010,
67
- "content": "▁<EOT>",
68
- "single_word": false,
69
- "lstrip": false,
70
- "rstrip": false,
71
- "normalized": false,
72
- "special": true
73
  }
74
  ],
75
  "normalizer": {
@@ -32175,23 +32139,7 @@
32175
  "왕": 31996,
32176
  "收": 31997,
32177
  "弘": 31998,
32178
- "给": 31999,
32179
- "▁<SU": 32000,
32180
- "▁<SUF": 32001,
32181
- "▁<PRE": 32002,
32182
- "▁<M": 32003,
32183
- "▁<MID": 32004,
32184
- "▁<E": 32005,
32185
- "▁<EOT": 32006,
32186
- "▁<PRE>": 32007,
32187
- "▁<SUF>": 32008,
32188
- "▁<MID>": 32009,
32189
- "▁<EOT>": 32010,
32190
- "▁<EOT><EOT>": 32011,
32191
- "▁<EOT><EOT><EOT>": 32012,
32192
- "▁<EOT><EOT><EOT><EOT>": 32013,
32193
- "▁<EOT><EOT><EOT><EOT><EOT>": 32014,
32194
- "▁<EOT><EOT><EOT><EOT><EOT><EOT>": 32015
32195
  },
32196
  "merges": [
32197
  "▁ t",
@@ -93442,18 +93390,7 @@
93442
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93443
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93444
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93445
- "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
93446
- "▁< SU",
93447
- "▁<SU F",
93448
- "▁< PRE",
93449
- "▁< M",
93450
- "▁<M ID",
93451
- "▁< E",
93452
- "▁<E OT",
93453
- "▁<PRE >",
93454
- "▁<SUF >",
93455
- "▁<MID >",
93456
- "▁<EOT >"
93457
  ]
93458
  }
93459
  }
 
34
  "rstrip": false,
35
  "normalized": false,
36
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
  ],
39
  "normalizer": {
 
32139
  "왕": 31996,
32140
  "收": 31997,
32141
  "弘": 31998,
32142
+ "给": 31999
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32143
  },
32144
  "merges": [
32145
  "▁ t",
 
93390
  "▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
93391
  "▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
93392
  "▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
93393
+ "▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁▁"
 
 
 
 
 
 
 
 
 
 
 
93394
  ]
93395
  }
93396
  }
last-checkpoint/tokenizer.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45ccb9c8b6b561889acea59191d66986d314e7cbd6a78abc6e49b139ca91c1e6
3
- size 500058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
last-checkpoint/tokenizer_config.json CHANGED
@@ -25,63 +25,17 @@
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
28
- },
29
- "32007": {
30
- "content": "▁<PRE>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "32008": {
38
- "content": "▁<SUF>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "32009": {
46
- "content": "▁<MID>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "32010": {
54
- "content": "▁<EOT>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
  }
61
  },
62
- "additional_special_tokens": [
63
- "▁<PRE>",
64
- "▁<MID>",
65
- "▁<SUF>",
66
- "▁<EOT>",
67
- "▁<PRE>",
68
- "▁<MID>",
69
- "▁<SUF>",
70
- "▁<EOT>"
71
- ],
72
  "bos_token": "<s>",
73
  "clean_up_tokenization_spaces": false,
74
  "eos_token": "</s>",
75
- "eot_token": "▁<EOT>",
76
- "fill_token": "<FILL_ME>",
77
- "legacy": null,
78
- "middle_token": "▁<MID>",
79
  "model_max_length": 1000000000000000019884624838656,
80
  "pad_token": "</s>",
81
- "prefix_token": "▁<PRE>",
82
  "sp_model_kwargs": {},
83
- "suffix_token": "▁<SUF>",
84
- "tokenizer_class": "CodeLlamaTokenizer",
85
  "unk_token": "<unk>",
86
  "use_default_system_prompt": false
87
  }
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
 
 
 
 
 
 
 
 
 
 
30
  "bos_token": "<s>",
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "</s>",
33
+ "legacy": false,
 
 
 
34
  "model_max_length": 1000000000000000019884624838656,
35
  "pad_token": "</s>",
36
+ "padding_side": "right",
37
  "sp_model_kwargs": {},
38
+ "tokenizer_class": "LlamaTokenizer",
 
39
  "unk_token": "<unk>",
40
  "use_default_system_prompt": false
41
  }
last-checkpoint/trainer_state.json CHANGED
@@ -10,177 +10,177 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "grad_norm": Infinity,
14
  "learning_rate": 0.0002,
15
- "loss": 2.4627,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0,
20
- "grad_norm": 948.3303833007812,
21
  "learning_rate": 0.0002,
22
- "loss": 6.0035,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.0,
27
- "grad_norm": 0.2631005644798279,
28
  "learning_rate": 0.0002,
29
- "loss": 3.0899,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.0,
34
- "grad_norm": 16874.787109375,
35
  "learning_rate": 0.0002,
36
- "loss": 2.9749,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.0,
41
- "grad_norm": 24137.34375,
42
  "learning_rate": 0.0002,
43
- "loss": 3.0415,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.0,
48
- "grad_norm": 0.28247156739234924,
49
  "learning_rate": 0.0002,
50
- "loss": 2.993,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.0,
55
- "grad_norm": 0.3185359835624695,
56
  "learning_rate": 0.0002,
57
- "loss": 2.9267,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.0,
62
- "grad_norm": 0.3125191032886505,
63
  "learning_rate": 0.0002,
64
- "loss": 2.7621,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.0,
69
- "grad_norm": 0.31877008080482483,
70
  "learning_rate": 0.0002,
71
- "loss": 2.6476,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.0,
76
- "grad_norm": 0.290170818567276,
77
  "learning_rate": 0.0002,
78
- "loss": 2.4098,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.0,
83
- "grad_norm": 0.3100622594356537,
84
  "learning_rate": 0.0002,
85
- "loss": 2.3601,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.0,
90
- "grad_norm": 0.3061903417110443,
91
  "learning_rate": 0.0002,
92
- "loss": 2.2337,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.0,
97
- "grad_norm": 0.35913383960723877,
98
  "learning_rate": 0.0002,
99
- "loss": 2.041,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.0,
104
- "grad_norm": 0.35995352268218994,
105
  "learning_rate": 0.0002,
106
- "loss": 1.896,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.01,
111
- "grad_norm": 0.35563966631889343,
112
  "learning_rate": 0.0002,
113
- "loss": 1.7044,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.01,
118
- "grad_norm": 0.3549964129924774,
119
  "learning_rate": 0.0002,
120
- "loss": 1.5553,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.01,
125
- "grad_norm": 0.3693196177482605,
126
  "learning_rate": 0.0002,
127
- "loss": 1.4171,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.01,
132
- "grad_norm": 0.3318246304988861,
133
  "learning_rate": 0.0002,
134
- "loss": 1.186,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.01,
139
- "grad_norm": 0.2706567645072937,
140
  "learning_rate": 0.0002,
141
- "loss": 1.1127,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.01,
146
- "grad_norm": 0.24653750658035278,
147
  "learning_rate": 0.0002,
148
- "loss": 1.0762,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.01,
153
- "grad_norm": 0.16432078182697296,
154
  "learning_rate": 0.0002,
155
- "loss": 0.9719,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.01,
160
- "grad_norm": 0.17397165298461914,
161
  "learning_rate": 0.0002,
162
- "loss": 0.9167,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.01,
167
- "grad_norm": 0.23421818017959595,
168
  "learning_rate": 0.0002,
169
- "loss": 0.8786,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.01,
174
- "grad_norm": 0.19150808453559875,
175
  "learning_rate": 0.0002,
176
- "loss": 0.89,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.01,
181
- "grad_norm": 755059.0,
182
  "learning_rate": 0.0002,
183
- "loss": 0.8755,
184
  "step": 25
185
  }
186
  ],
@@ -189,7 +189,7 @@
189
  "num_input_tokens_seen": 0,
190
  "num_train_epochs": 1,
191
  "save_steps": 25,
192
- "total_flos": 3544369882398720.0,
193
  "train_batch_size": 8,
194
  "trial_name": null,
195
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "grad_norm": 0.21934866905212402,
14
  "learning_rate": 0.0002,
15
+ "loss": 2.1352,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0,
20
+ "grad_norm": 0.37619397044181824,
21
  "learning_rate": 0.0002,
22
+ "loss": 2.3002,
23
  "step": 2
24
  },
25
  {
26
  "epoch": 0.0,
27
+ "grad_norm": 0.43210744857788086,
28
  "learning_rate": 0.0002,
29
+ "loss": 2.041,
30
  "step": 3
31
  },
32
  {
33
  "epoch": 0.0,
34
+ "grad_norm": 0.4717111885547638,
35
  "learning_rate": 0.0002,
36
+ "loss": 1.839,
37
  "step": 4
38
  },
39
  {
40
  "epoch": 0.0,
41
+ "grad_norm": 0.6455919146537781,
42
  "learning_rate": 0.0002,
43
+ "loss": 1.5939,
44
  "step": 5
45
  },
46
  {
47
  "epoch": 0.0,
48
+ "grad_norm": 1.2801408767700195,
49
  "learning_rate": 0.0002,
50
+ "loss": 1.327,
51
  "step": 6
52
  },
53
  {
54
  "epoch": 0.0,
55
+ "grad_norm": 0.9770981669425964,
56
  "learning_rate": 0.0002,
57
+ "loss": 1.1536,
58
  "step": 7
59
  },
60
  {
61
  "epoch": 0.0,
62
+ "grad_norm": 1.177263617515564,
63
  "learning_rate": 0.0002,
64
+ "loss": 0.9881,
65
  "step": 8
66
  },
67
  {
68
  "epoch": 0.0,
69
+ "grad_norm": 0.6201061606407166,
70
  "learning_rate": 0.0002,
71
+ "loss": 0.8609,
72
  "step": 9
73
  },
74
  {
75
  "epoch": 0.0,
76
+ "grad_norm": 1.45395827293396,
77
  "learning_rate": 0.0002,
78
+ "loss": 0.8477,
79
  "step": 10
80
  },
81
  {
82
  "epoch": 0.0,
83
+ "grad_norm": 1.0724296569824219,
84
  "learning_rate": 0.0002,
85
+ "loss": 0.7573,
86
  "step": 11
87
  },
88
  {
89
  "epoch": 0.0,
90
+ "grad_norm": 0.9028312563896179,
91
  "learning_rate": 0.0002,
92
+ "loss": 0.7258,
93
  "step": 12
94
  },
95
  {
96
  "epoch": 0.0,
97
+ "grad_norm": 0.8523911237716675,
98
  "learning_rate": 0.0002,
99
+ "loss": 0.7513,
100
  "step": 13
101
  },
102
  {
103
  "epoch": 0.0,
104
+ "grad_norm": 0.6326367855072021,
105
  "learning_rate": 0.0002,
106
+ "loss": 0.7186,
107
  "step": 14
108
  },
109
  {
110
  "epoch": 0.01,
111
+ "grad_norm": 0.39301833510398865,
112
  "learning_rate": 0.0002,
113
+ "loss": 0.706,
114
  "step": 15
115
  },
116
  {
117
  "epoch": 0.01,
118
+ "grad_norm": 0.5725602507591248,
119
  "learning_rate": 0.0002,
120
+ "loss": 0.6406,
121
  "step": 16
122
  },
123
  {
124
  "epoch": 0.01,
125
+ "grad_norm": 0.6625002026557922,
126
  "learning_rate": 0.0002,
127
+ "loss": 0.6349,
128
  "step": 17
129
  },
130
  {
131
  "epoch": 0.01,
132
+ "grad_norm": 0.19412539899349213,
133
  "learning_rate": 0.0002,
134
+ "loss": 0.6134,
135
  "step": 18
136
  },
137
  {
138
  "epoch": 0.01,
139
+ "grad_norm": 0.34864893555641174,
140
  "learning_rate": 0.0002,
141
+ "loss": 0.6381,
142
  "step": 19
143
  },
144
  {
145
  "epoch": 0.01,
146
+ "grad_norm": 0.30731046199798584,
147
  "learning_rate": 0.0002,
148
+ "loss": 0.6553,
149
  "step": 20
150
  },
151
  {
152
  "epoch": 0.01,
153
+ "grad_norm": 0.1636987179517746,
154
  "learning_rate": 0.0002,
155
+ "loss": 0.5984,
156
  "step": 21
157
  },
158
  {
159
  "epoch": 0.01,
160
+ "grad_norm": 0.1931622475385666,
161
  "learning_rate": 0.0002,
162
+ "loss": 0.5821,
163
  "step": 22
164
  },
165
  {
166
  "epoch": 0.01,
167
+ "grad_norm": 0.32078325748443604,
168
  "learning_rate": 0.0002,
169
+ "loss": 0.586,
170
  "step": 23
171
  },
172
  {
173
  "epoch": 0.01,
174
+ "grad_norm": 0.17550581693649292,
175
  "learning_rate": 0.0002,
176
+ "loss": 0.5315,
177
  "step": 24
178
  },
179
  {
180
  "epoch": 0.01,
181
+ "grad_norm": 0.22394584119319916,
182
  "learning_rate": 0.0002,
183
+ "loss": 0.5415,
184
  "step": 25
185
  }
186
  ],
 
189
  "num_input_tokens_seen": 0,
190
  "num_train_epochs": 1,
191
  "save_steps": 25,
192
+ "total_flos": 6828259445391360.0,
193
  "train_batch_size": 8,
194
  "trial_name": null,
195
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bd37a7bed4b29db3131947c878454b234a832069e5051a4e513a682b0b6b1f6
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229196146870c6d4028c2c7478b67450b5d84079445d42ab8c78010679048a83
3
  size 4856