Go4miii commited on
Commit
0a2afe7
1 Parent(s): ffe002c

commit from root

Browse files
Baichuan-13B-Chat-lora-Retrieval/README.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+ ### Framework versions
7
+
8
+
9
+ - PEFT 0.5.0
Baichuan-13B-Chat-lora-Retrieval/adapter_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "baichuan-inc/Baichuan-13B-Chat",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 32.0,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "W_pack"
18
+ ],
19
+ "task_type": "CAUSAL_LM"
20
+ }
Baichuan-13B-Chat-lora-Retrieval/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f743cb6f789074ff8abf9efd5bedc2451b00b041f8e357dea2d989b58d8dabd
3
+ size 26243422
Baichuan-13B-Chat-lora-Retrieval/all_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.99,
3
+ "eval_loss": 1.0294359922409058,
4
+ "eval_runtime": 7.9413,
5
+ "eval_samples_per_second": 25.436,
6
+ "eval_steps_per_second": 2.141,
7
+ "train_loss": 1.0595260426618052,
8
+ "train_runtime": 5379.5583,
9
+ "train_samples_per_second": 7.407,
10
+ "train_steps_per_second": 0.077
11
+ }
Baichuan-13B-Chat-lora-Retrieval/eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.99,
3
+ "eval_loss": 1.0294359922409058,
4
+ "eval_runtime": 7.9413,
5
+ "eval_samples_per_second": 25.436,
6
+ "eval_steps_per_second": 2.141
7
+ }
Baichuan-13B-Chat-lora-Retrieval/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": true
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": true
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ }
30
+ }
Baichuan-13B-Chat-lora-Retrieval/tokenization_baichuan.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ import sentencepiece as spm
8
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
9
+ from transformers.utils import logging
10
+
11
+
12
+ logger = logging.get_logger(__name__)
13
+
14
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
15
+
16
+ PRETRAINED_VOCAB_FILES_MAP = {
17
+ "vocab_file": {},
18
+ "tokenizer_file": {},
19
+ }
20
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
21
+
22
+
23
+ class BaichuanTokenizer(PreTrainedTokenizer):
24
+ """
25
+ Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
26
+
27
+ Args:
28
+ vocab_file (`str`):
29
+ Path to the vocabulary file.
30
+ """
31
+
32
+ vocab_files_names = VOCAB_FILES_NAMES
33
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
34
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
35
+ model_input_names = ["input_ids", "attention_mask"]
36
+
37
+ def __init__(
38
+ self,
39
+ vocab_file,
40
+ unk_token="<unk>",
41
+ bos_token="<s>",
42
+ eos_token="</s>",
43
+ pad_token=None,
44
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
45
+ add_bos_token=True,
46
+ add_eos_token=False,
47
+ clean_up_tokenization_spaces=False,
48
+ **kwargs,
49
+ ):
50
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
51
+ bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
52
+ eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
53
+ unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
54
+ pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
55
+ super().__init__(
56
+ bos_token=bos_token,
57
+ eos_token=eos_token,
58
+ unk_token=unk_token,
59
+ pad_token=pad_token,
60
+ add_bos_token=add_bos_token,
61
+ add_eos_token=add_eos_token,
62
+ sp_model_kwargs=self.sp_model_kwargs,
63
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
64
+ **kwargs,
65
+ )
66
+ self.vocab_file = vocab_file
67
+ self.add_bos_token = add_bos_token
68
+ self.add_eos_token = add_eos_token
69
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
70
+ self.sp_model.Load(vocab_file)
71
+
72
+ def __getstate__(self):
73
+ state = self.__dict__.copy()
74
+ state["sp_model"] = None
75
+ return state
76
+
77
+ def __setstate__(self, d):
78
+ self.__dict__ = d
79
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
80
+ self.sp_model.Load(self.vocab_file)
81
+
82
+ @property
83
+ def vocab_size(self):
84
+ """Returns vocab size"""
85
+ return self.sp_model.get_piece_size()
86
+
87
+ def get_vocab(self):
88
+ """Returns vocab as a dict"""
89
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
90
+ vocab.update(self.added_tokens_encoder)
91
+ return vocab
92
+
93
+ def _tokenize(self, text):
94
+ """Returns a tokenized string."""
95
+ return self.sp_model.encode(text, out_type=str)
96
+
97
+ def _convert_token_to_id(self, token):
98
+ """Converts a token (str) in an id using the vocab."""
99
+ return self.sp_model.piece_to_id(token)
100
+
101
+ def _convert_id_to_token(self, index):
102
+ """Converts an index (integer) in a token (str) using the vocab."""
103
+ token = self.sp_model.IdToPiece(index)
104
+ return token
105
+
106
+ def convert_tokens_to_string(self, tokens):
107
+ """Converts a sequence of tokens (string) in a single string."""
108
+ current_sub_tokens = []
109
+ out_string = ""
110
+ prev_is_special = False
111
+ for i, token in enumerate(tokens):
112
+ # make sure that special tokens are not decoded using sentencepiece model
113
+ if token in self.all_special_tokens:
114
+ if not prev_is_special and i != 0:
115
+ out_string += " "
116
+ out_string += self.sp_model.decode(current_sub_tokens) + token
117
+ prev_is_special = True
118
+ current_sub_tokens = []
119
+ else:
120
+ current_sub_tokens.append(token)
121
+ prev_is_special = False
122
+ out_string += self.sp_model.decode(current_sub_tokens)
123
+ return out_string
124
+
125
+ def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
126
+ """
127
+ Save the vocabulary and special tokens file to a directory.
128
+
129
+ Args:
130
+ save_directory (`str`):
131
+ The directory in which to save the vocabulary.
132
+
133
+ Returns:
134
+ `Tuple(str)`: Paths to the files saved.
135
+ """
136
+ if not os.path.isdir(save_directory):
137
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
138
+ return
139
+ out_vocab_file = os.path.join(
140
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
141
+ )
142
+
143
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
144
+ copyfile(self.vocab_file, out_vocab_file)
145
+ elif not os.path.isfile(self.vocab_file):
146
+ with open(out_vocab_file, "wb") as fi:
147
+ content_spiece_model = self.sp_model.serialized_model_proto()
148
+ fi.write(content_spiece_model)
149
+
150
+ return (out_vocab_file,)
151
+
152
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
153
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
154
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
155
+
156
+ output = bos_token_id + token_ids_0 + eos_token_id
157
+
158
+ if token_ids_1 is not None:
159
+ output = output + bos_token_id + token_ids_1 + eos_token_id
160
+
161
+ return output
162
+
163
+ def get_special_tokens_mask(
164
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
165
+ ) -> List[int]:
166
+ """
167
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
168
+ special tokens using the tokenizer `prepare_for_model` method.
169
+
170
+ Args:
171
+ token_ids_0 (`List[int]`):
172
+ List of IDs.
173
+ token_ids_1 (`List[int]`, *optional*):
174
+ Optional second list of IDs for sequence pairs.
175
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
176
+ Whether or not the token list is already formatted with special tokens for the model.
177
+
178
+ Returns:
179
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
180
+ """
181
+ if already_has_special_tokens:
182
+ return super().get_special_tokens_mask(
183
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
184
+ )
185
+
186
+ bos_token_id = [1] if self.add_bos_token else []
187
+ eos_token_id = [1] if self.add_eos_token else []
188
+
189
+ if token_ids_1 is None:
190
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
191
+ return (
192
+ bos_token_id
193
+ + ([0] * len(token_ids_0))
194
+ + eos_token_id
195
+ + bos_token_id
196
+ + ([0] * len(token_ids_1))
197
+ + eos_token_id
198
+ )
199
+
200
+ def create_token_type_ids_from_sequences(
201
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
202
+ ) -> List[int]:
203
+ """
204
+ Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
205
+ sequence pair mask has the following format:
206
+
207
+ ```
208
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
209
+ | first sequence | second sequence |
210
+ ```
211
+
212
+ if token_ids_1 is None, only returns the first portion of the mask (0s).
213
+
214
+ Args:
215
+ token_ids_0 (`List[int]`):
216
+ List of ids.
217
+ token_ids_1 (`List[int]`, *optional*):
218
+ Optional second list of IDs for sequence pairs.
219
+
220
+ Returns:
221
+ `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
222
+ """
223
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
224
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
225
+
226
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
227
+
228
+ if token_ids_1 is not None:
229
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
230
+
231
+ return output
232
+
Baichuan-13B-Chat-lora-Retrieval/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d1ab69d25c74644af5c5e4dcd1cc6e96d33783dbd257b6bdea55b643c72813
3
+ size 1136765
Baichuan-13B-Chat-lora-Retrieval/tokenizer_config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "auto_map": {
5
+ "AutoTokenizer": [
6
+ "tokenization_baichuan.BaichuanTokenizer",
7
+ null
8
+ ]
9
+ },
10
+ "bos_token": {
11
+ "__type": "AddedToken",
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": true
17
+ },
18
+ "clean_up_tokenization_spaces": false,
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": true
26
+ },
27
+ "model_max_length": 4096,
28
+ "pad_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<unk>",
31
+ "lstrip": false,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": true
35
+ },
36
+ "padding_side": "right",
37
+ "sp_model_kwargs": {},
38
+ "split_special_tokens": false,
39
+ "tokenizer_class": "BaichuanTokenizer",
40
+ "unk_token": {
41
+ "__type": "AddedToken",
42
+ "content": "<unk>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": true
47
+ }
48
+ }
Baichuan-13B-Chat-lora-Retrieval/train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.99,
3
+ "train_loss": 1.0595260426618052,
4
+ "train_runtime": 5379.5583,
5
+ "train_samples_per_second": 7.407,
6
+ "train_steps_per_second": 0.077
7
+ }
Baichuan-13B-Chat-lora-Retrieval/trainer_log.jsonl ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 10, "total_steps": 414, "loss": 1.3488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9928054992195985e-05, "epoch": 0.05, "percentage": 2.42, "elapsed_time": "0:02:06", "remaining_time": "1:24:56"}
2
+ {"current_steps": 20, "total_steps": 414, "loss": 1.2097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.971263405551576e-05, "epoch": 0.1, "percentage": 4.83, "elapsed_time": "0:04:15", "remaining_time": "1:23:44"}
3
+ {"current_steps": 30, "total_steps": 414, "loss": 1.1719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9354977066836986e-05, "epoch": 0.14, "percentage": 7.25, "elapsed_time": "0:06:23", "remaining_time": "1:21:51"}
4
+ {"current_steps": 40, "total_steps": 414, "loss": 1.145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.885714255694698e-05, "epoch": 0.19, "percentage": 9.66, "elapsed_time": "0:08:30", "remaining_time": "1:19:35"}
5
+ {"current_steps": 50, "total_steps": 414, "loss": 1.1203, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.822199586246168e-05, "epoch": 0.24, "percentage": 12.08, "elapsed_time": "0:10:41", "remaining_time": "1:17:52"}
6
+ {"current_steps": 50, "total_steps": 414, "loss": null, "eval_loss": 1.11279296875, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.24, "percentage": 12.08, "elapsed_time": "0:10:41", "remaining_time": "1:17:52"}
7
+ {"current_steps": 60, "total_steps": 414, "loss": 1.1036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.74531926340924e-05, "epoch": 0.29, "percentage": 14.49, "elapsed_time": "0:13:00", "remaining_time": "1:16:47"}
8
+ {"current_steps": 70, "total_steps": 414, "loss": 1.0904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6555157796180335e-05, "epoch": 0.34, "percentage": 16.91, "elapsed_time": "0:15:08", "remaining_time": "1:14:24"}
9
+ {"current_steps": 80, "total_steps": 414, "loss": 1.0818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5533060078599226e-05, "epoch": 0.39, "percentage": 19.32, "elapsed_time": "0:17:14", "remaining_time": "1:11:58"}
10
+ {"current_steps": 90, "total_steps": 414, "loss": 1.0829, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.43927822676105e-05, "epoch": 0.43, "percentage": 21.74, "elapsed_time": "0:19:21", "remaining_time": "1:09:41"}
11
+ {"current_steps": 100, "total_steps": 414, "loss": 1.0744, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3140887346894974e-05, "epoch": 0.48, "percentage": 24.15, "elapsed_time": "0:21:34", "remaining_time": "1:07:43"}
12
+ {"current_steps": 100, "total_steps": 414, "loss": null, "eval_loss": 1.065435767173767, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.48, "percentage": 24.15, "elapsed_time": "0:21:34", "remaining_time": "1:07:43"}
13
+ {"current_steps": 110, "total_steps": 414, "loss": 1.0629, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1784580723639923e-05, "epoch": 0.53, "percentage": 26.57, "elapsed_time": "0:23:52", "remaining_time": "1:05:58"}
14
+ {"current_steps": 120, "total_steps": 414, "loss": 1.0659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.033166875709291e-05, "epoch": 0.58, "percentage": 28.99, "elapsed_time": "0:25:56", "remaining_time": "1:03:33"}
15
+ {"current_steps": 130, "total_steps": 414, "loss": 1.0523, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8790513828275683e-05, "epoch": 0.63, "percentage": 31.4, "elapsed_time": "0:28:05", "remaining_time": "1:01:21"}
16
+ {"current_steps": 140, "total_steps": 414, "loss": 1.0397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.716998620945871e-05, "epoch": 0.67, "percentage": 33.82, "elapsed_time": "0:30:13", "remaining_time": "0:59:09"}
17
+ {"current_steps": 150, "total_steps": 414, "loss": 1.0435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.547941301041661e-05, "epoch": 0.72, "percentage": 36.23, "elapsed_time": "0:32:22", "remaining_time": "0:56:58"}
18
+ {"current_steps": 150, "total_steps": 414, "loss": null, "eval_loss": 1.048127293586731, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.72, "percentage": 36.23, "elapsed_time": "0:32:22", "remaining_time": "0:56:58"}
19
+ {"current_steps": 160, "total_steps": 414, "loss": 1.0563, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.372852449530922e-05, "epoch": 0.77, "percentage": 38.65, "elapsed_time": "0:34:39", "remaining_time": "0:55:00"}
20
+ {"current_steps": 170, "total_steps": 414, "loss": 1.0376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1927398079167226e-05, "epoch": 0.82, "percentage": 41.06, "elapsed_time": "0:36:44", "remaining_time": "0:52:44"}
21
+ {"current_steps": 180, "total_steps": 414, "loss": 1.0436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.008640032631585e-05, "epoch": 0.87, "percentage": 43.48, "elapsed_time": "0:38:52", "remaining_time": "0:50:32"}
22
+ {"current_steps": 190, "total_steps": 414, "loss": 1.0301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.821612728457078e-05, "epoch": 0.92, "percentage": 45.89, "elapsed_time": "0:41:04", "remaining_time": "0:48:25"}
23
+ {"current_steps": 200, "total_steps": 414, "loss": 1.0338, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.632734349861874e-05, "epoch": 0.96, "percentage": 48.31, "elapsed_time": "0:43:13", "remaining_time": "0:46:14"}
24
+ {"current_steps": 200, "total_steps": 414, "loss": null, "eval_loss": 1.0388035774230957, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 0.96, "percentage": 48.31, "elapsed_time": "0:43:13", "remaining_time": "0:46:14"}
25
+ {"current_steps": 210, "total_steps": 414, "loss": 1.033, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4430920053597356e-05, "epoch": 1.01, "percentage": 50.72, "elapsed_time": "0:45:29", "remaining_time": "0:44:11"}
26
+ {"current_steps": 220, "total_steps": 414, "loss": 1.0399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2537772005470782e-05, "epoch": 1.06, "percentage": 53.14, "elapsed_time": "0:47:37", "remaining_time": "0:41:59"}
27
+ {"current_steps": 230, "total_steps": 414, "loss": 1.0277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0658795558326743e-05, "epoch": 1.11, "percentage": 55.56, "elapsed_time": "0:49:44", "remaining_time": "0:39:47"}
28
+ {"current_steps": 240, "total_steps": 414, "loss": 1.0325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8804805350177505e-05, "epoch": 1.16, "percentage": 57.97, "elapsed_time": "0:51:54", "remaining_time": "0:37:38"}
29
+ {"current_steps": 250, "total_steps": 414, "loss": 1.0322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6986472208222576e-05, "epoch": 1.2, "percentage": 60.39, "elapsed_time": "0:54:02", "remaining_time": "0:35:26"}
30
+ {"current_steps": 250, "total_steps": 414, "loss": null, "eval_loss": 1.0336002111434937, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.2, "percentage": 60.39, "elapsed_time": "0:54:02", "remaining_time": "0:35:26"}
31
+ {"current_steps": 260, "total_steps": 414, "loss": 1.0248, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5214261731829022e-05, "epoch": 1.25, "percentage": 62.8, "elapsed_time": "0:56:17", "remaining_time": "0:33:20"}
32
+ {"current_steps": 270, "total_steps": 414, "loss": 1.0235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3498374056721197e-05, "epoch": 1.3, "percentage": 65.22, "elapsed_time": "0:58:28", "remaining_time": "0:31:11"}
33
+ {"current_steps": 280, "total_steps": 414, "loss": 1.0446, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1848685147073222e-05, "epoch": 1.35, "percentage": 67.63, "elapsed_time": "1:00:38", "remaining_time": "0:29:01"}
34
+ {"current_steps": 290, "total_steps": 414, "loss": 1.0271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0274689953403407e-05, "epoch": 1.4, "percentage": 70.05, "elapsed_time": "1:02:46", "remaining_time": "0:26:50"}
35
+ {"current_steps": 300, "total_steps": 414, "loss": 1.0351, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.785447763431101e-06, "epoch": 1.44, "percentage": 72.46, "elapsed_time": "1:04:51", "remaining_time": "0:24:38"}
36
+ {"current_steps": 300, "total_steps": 414, "loss": null, "eval_loss": 1.031198263168335, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.44, "percentage": 72.46, "elapsed_time": "1:04:51", "remaining_time": "0:24:38"}
37
+ {"current_steps": 310, "total_steps": 414, "loss": 1.0341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.389530060434696e-06, "epoch": 1.49, "percentage": 74.88, "elapsed_time": "1:07:07", "remaining_time": "0:22:31"}
38
+ {"current_steps": 320, "total_steps": 414, "loss": 1.0267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.094971189217042e-06, "epoch": 1.54, "percentage": 77.29, "elapsed_time": "1:09:15", "remaining_time": "0:20:20"}
39
+ {"current_steps": 330, "total_steps": 414, "loss": 1.0122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9092221136255444e-06, "epoch": 1.59, "percentage": 79.71, "elapsed_time": "1:11:24", "remaining_time": "0:18:10"}
40
+ {"current_steps": 340, "total_steps": 414, "loss": 1.0197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.839107531779978e-06, "epoch": 1.64, "percentage": 82.13, "elapsed_time": "1:13:31", "remaining_time": "0:16:00"}
41
+ {"current_steps": 350, "total_steps": 414, "loss": 1.0193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.890786595835693e-06, "epoch": 1.69, "percentage": 84.54, "elapsed_time": "1:15:41", "remaining_time": "0:13:50"}
42
+ {"current_steps": 350, "total_steps": 414, "loss": null, "eval_loss": 1.0297337770462036, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.69, "percentage": 84.54, "elapsed_time": "1:15:41", "remaining_time": "0:13:50"}
43
+ {"current_steps": 360, "total_steps": 414, "loss": 1.0231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0697174623636794e-06, "epoch": 1.73, "percentage": 86.96, "elapsed_time": "1:17:59", "remaining_time": "0:11:41"}
44
+ {"current_steps": 370, "total_steps": 414, "loss": 1.0233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3806258773811476e-06, "epoch": 1.78, "percentage": 89.37, "elapsed_time": "1:20:09", "remaining_time": "0:09:31"}
45
+ {"current_steps": 380, "total_steps": 414, "loss": 1.0224, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.274779768448482e-07, "epoch": 1.83, "percentage": 91.79, "elapsed_time": "1:22:18", "remaining_time": "0:07:21"}
46
+ {"current_steps": 390, "total_steps": 414, "loss": 1.0181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.134574591564494e-07, "epoch": 1.88, "percentage": 94.2, "elapsed_time": "1:24:25", "remaining_time": "0:05:11"}
47
+ {"current_steps": 400, "total_steps": 414, "loss": 1.0189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4094726106603505e-07, "epoch": 1.93, "percentage": 96.62, "elapsed_time": "1:26:32", "remaining_time": "0:03:01"}
48
+ {"current_steps": 400, "total_steps": 414, "loss": null, "eval_loss": 1.0294359922409058, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.93, "percentage": 96.62, "elapsed_time": "1:26:32", "remaining_time": "0:03:01"}
49
+ {"current_steps": 410, "total_steps": 414, "loss": 1.0244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1515842439871472e-08, "epoch": 1.97, "percentage": 99.03, "elapsed_time": "1:28:47", "remaining_time": "0:00:51"}
50
+ {"current_steps": 414, "total_steps": 414, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.99, "percentage": 100.0, "elapsed_time": "1:29:38", "remaining_time": "0:00:00"}
51
+ {"current_steps": 17, "total_steps": 17, "loss": null, "eval_loss": 1.0294359922409058, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 1.99, "percentage": 100.0, "elapsed_time": "1:29:46", "remaining_time": "0:00:00"}
Baichuan-13B-Chat-lora-Retrieval/trainer_state.json ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0294359922409058,
3
+ "best_model_checkpoint": "output/Baichuan-13B-Chat_lora_wqs_jiansuo/checkpoint-400",
4
+ "epoch": 1.9939795304033714,
5
+ "global_step": 414,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 4.9928054992195985e-05,
13
+ "loss": 1.3488,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 4.971263405551576e-05,
19
+ "loss": 1.2097,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.14,
24
+ "learning_rate": 4.9354977066836986e-05,
25
+ "loss": 1.1719,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 4.885714255694698e-05,
31
+ "loss": 1.145,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.24,
36
+ "learning_rate": 4.822199586246168e-05,
37
+ "loss": 1.1203,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.24,
42
+ "eval_loss": 1.11279296875,
43
+ "eval_runtime": 10.7965,
44
+ "eval_samples_per_second": 18.71,
45
+ "eval_steps_per_second": 1.575,
46
+ "step": 50
47
+ },
48
+ {
49
+ "epoch": 0.29,
50
+ "learning_rate": 4.74531926340924e-05,
51
+ "loss": 1.1036,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.34,
56
+ "learning_rate": 4.6555157796180335e-05,
57
+ "loss": 1.0904,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.39,
62
+ "learning_rate": 4.5533060078599226e-05,
63
+ "loss": 1.0818,
64
+ "step": 80
65
+ },
66
+ {
67
+ "epoch": 0.43,
68
+ "learning_rate": 4.43927822676105e-05,
69
+ "loss": 1.0829,
70
+ "step": 90
71
+ },
72
+ {
73
+ "epoch": 0.48,
74
+ "learning_rate": 4.3140887346894974e-05,
75
+ "loss": 1.0744,
76
+ "step": 100
77
+ },
78
+ {
79
+ "epoch": 0.48,
80
+ "eval_loss": 1.065435767173767,
81
+ "eval_runtime": 7.9424,
82
+ "eval_samples_per_second": 25.433,
83
+ "eval_steps_per_second": 2.14,
84
+ "step": 100
85
+ },
86
+ {
87
+ "epoch": 0.53,
88
+ "learning_rate": 4.1784580723639923e-05,
89
+ "loss": 1.0629,
90
+ "step": 110
91
+ },
92
+ {
93
+ "epoch": 0.58,
94
+ "learning_rate": 4.033166875709291e-05,
95
+ "loss": 1.0659,
96
+ "step": 120
97
+ },
98
+ {
99
+ "epoch": 0.63,
100
+ "learning_rate": 3.8790513828275683e-05,
101
+ "loss": 1.0523,
102
+ "step": 130
103
+ },
104
+ {
105
+ "epoch": 0.67,
106
+ "learning_rate": 3.716998620945871e-05,
107
+ "loss": 1.0397,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.72,
112
+ "learning_rate": 3.547941301041661e-05,
113
+ "loss": 1.0435,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.72,
118
+ "eval_loss": 1.048127293586731,
119
+ "eval_runtime": 8.0034,
120
+ "eval_samples_per_second": 25.239,
121
+ "eval_steps_per_second": 2.124,
122
+ "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.77,
126
+ "learning_rate": 3.372852449530922e-05,
127
+ "loss": 1.0563,
128
+ "step": 160
129
+ },
130
+ {
131
+ "epoch": 0.82,
132
+ "learning_rate": 3.1927398079167226e-05,
133
+ "loss": 1.0376,
134
+ "step": 170
135
+ },
136
+ {
137
+ "epoch": 0.87,
138
+ "learning_rate": 3.008640032631585e-05,
139
+ "loss": 1.0436,
140
+ "step": 180
141
+ },
142
+ {
143
+ "epoch": 0.92,
144
+ "learning_rate": 2.821612728457078e-05,
145
+ "loss": 1.0301,
146
+ "step": 190
147
+ },
148
+ {
149
+ "epoch": 0.96,
150
+ "learning_rate": 2.632734349861874e-05,
151
+ "loss": 1.0338,
152
+ "step": 200
153
+ },
154
+ {
155
+ "epoch": 0.96,
156
+ "eval_loss": 1.0388035774230957,
157
+ "eval_runtime": 7.9339,
158
+ "eval_samples_per_second": 25.46,
159
+ "eval_steps_per_second": 2.143,
160
+ "step": 200
161
+ },
162
+ {
163
+ "epoch": 1.01,
164
+ "learning_rate": 2.4430920053597356e-05,
165
+ "loss": 1.033,
166
+ "step": 210
167
+ },
168
+ {
169
+ "epoch": 1.06,
170
+ "learning_rate": 2.2537772005470782e-05,
171
+ "loss": 1.0399,
172
+ "step": 220
173
+ },
174
+ {
175
+ "epoch": 1.11,
176
+ "learning_rate": 2.0658795558326743e-05,
177
+ "loss": 1.0277,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 1.16,
182
+ "learning_rate": 1.8804805350177505e-05,
183
+ "loss": 1.0325,
184
+ "step": 240
185
+ },
186
+ {
187
+ "epoch": 1.2,
188
+ "learning_rate": 1.6986472208222576e-05,
189
+ "loss": 1.0322,
190
+ "step": 250
191
+ },
192
+ {
193
+ "epoch": 1.2,
194
+ "eval_loss": 1.0336002111434937,
195
+ "eval_runtime": 7.9742,
196
+ "eval_samples_per_second": 25.332,
197
+ "eval_steps_per_second": 2.132,
198
+ "step": 250
199
+ },
200
+ {
201
+ "epoch": 1.25,
202
+ "learning_rate": 1.5214261731829022e-05,
203
+ "loss": 1.0248,
204
+ "step": 260
205
+ },
206
+ {
207
+ "epoch": 1.3,
208
+ "learning_rate": 1.3498374056721197e-05,
209
+ "loss": 1.0235,
210
+ "step": 270
211
+ },
212
+ {
213
+ "epoch": 1.35,
214
+ "learning_rate": 1.1848685147073222e-05,
215
+ "loss": 1.0446,
216
+ "step": 280
217
+ },
218
+ {
219
+ "epoch": 1.4,
220
+ "learning_rate": 1.0274689953403407e-05,
221
+ "loss": 1.0271,
222
+ "step": 290
223
+ },
224
+ {
225
+ "epoch": 1.44,
226
+ "learning_rate": 8.785447763431101e-06,
227
+ "loss": 1.0351,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.44,
232
+ "eval_loss": 1.031198263168335,
233
+ "eval_runtime": 7.9378,
234
+ "eval_samples_per_second": 25.448,
235
+ "eval_steps_per_second": 2.142,
236
+ "step": 300
237
+ },
238
+ {
239
+ "epoch": 1.49,
240
+ "learning_rate": 7.389530060434696e-06,
241
+ "loss": 1.0341,
242
+ "step": 310
243
+ },
244
+ {
245
+ "epoch": 1.54,
246
+ "learning_rate": 6.094971189217042e-06,
247
+ "loss": 1.0267,
248
+ "step": 320
249
+ },
250
+ {
251
+ "epoch": 1.59,
252
+ "learning_rate": 4.9092221136255444e-06,
253
+ "loss": 1.0122,
254
+ "step": 330
255
+ },
256
+ {
257
+ "epoch": 1.64,
258
+ "learning_rate": 3.839107531779978e-06,
259
+ "loss": 1.0197,
260
+ "step": 340
261
+ },
262
+ {
263
+ "epoch": 1.69,
264
+ "learning_rate": 2.890786595835693e-06,
265
+ "loss": 1.0193,
266
+ "step": 350
267
+ },
268
+ {
269
+ "epoch": 1.69,
270
+ "eval_loss": 1.0297337770462036,
271
+ "eval_runtime": 7.9869,
272
+ "eval_samples_per_second": 25.292,
273
+ "eval_steps_per_second": 2.128,
274
+ "step": 350
275
+ },
276
+ {
277
+ "epoch": 1.73,
278
+ "learning_rate": 2.0697174623636794e-06,
279
+ "loss": 1.0231,
280
+ "step": 360
281
+ },
282
+ {
283
+ "epoch": 1.78,
284
+ "learning_rate": 1.3806258773811476e-06,
285
+ "loss": 1.0233,
286
+ "step": 370
287
+ },
288
+ {
289
+ "epoch": 1.83,
290
+ "learning_rate": 8.274779768448482e-07,
291
+ "loss": 1.0224,
292
+ "step": 380
293
+ },
294
+ {
295
+ "epoch": 1.88,
296
+ "learning_rate": 4.134574591564494e-07,
297
+ "loss": 1.0181,
298
+ "step": 390
299
+ },
300
+ {
301
+ "epoch": 1.93,
302
+ "learning_rate": 1.4094726106603505e-07,
303
+ "loss": 1.0189,
304
+ "step": 400
305
+ },
306
+ {
307
+ "epoch": 1.93,
308
+ "eval_loss": 1.0294359922409058,
309
+ "eval_runtime": 7.9102,
310
+ "eval_samples_per_second": 25.537,
311
+ "eval_steps_per_second": 2.149,
312
+ "step": 400
313
+ },
314
+ {
315
+ "epoch": 1.97,
316
+ "learning_rate": 1.1515842439871472e-08,
317
+ "loss": 1.0244,
318
+ "step": 410
319
+ },
320
+ {
321
+ "epoch": 1.99,
322
+ "step": 414,
323
+ "total_flos": 3.076881712207102e+18,
324
+ "train_loss": 1.0595260426618052,
325
+ "train_runtime": 5379.5583,
326
+ "train_samples_per_second": 7.407,
327
+ "train_steps_per_second": 0.077
328
+ }
329
+ ],
330
+ "max_steps": 414,
331
+ "num_train_epochs": 2,
332
+ "total_flos": 3.076881712207102e+18,
333
+ "trial_name": null,
334
+ "trial_params": null
335
+ }
Baichuan-13B-Chat-lora-Retrieval/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0517bc6b507559268a02b20e558d9c2fc2bdf7c35709cc817272afdf90eb8b7d
3
+ size 4600
Baichuan-13B-Chat-lora-Retrieval/training_eval_loss.png ADDED
Baichuan-13B-Chat-lora-Retrieval/training_loss.png ADDED