LZHgrla commited on
Commit
8809bb6
1 Parent(s): a466733

upload adapter

Browse files
Files changed (4) hide show
  1. README.md +43 -0
  2. adapter_config.json +26 -0
  3. adapter_model.bin +3 -0
  4. xtuner_config.py +217 -0
README.md ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ pipeline_tag: conversational
4
+ base_model: internlm/internlm-20b
5
+ ---
6
+
7
+ <div align="center">
8
+ <img src="https://github.com/InternLM/lmdeploy/assets/36994684/0cf8d00f-e86b-40ba-9b54-dc8f1bc6c8d8" width="600"/>
9
+
10
+
11
+ [![Generic badge](https://img.shields.io/badge/GitHub-%20XTuner-black.svg)](https://github.com/InternLM/xtuner)
12
+
13
+
14
+ </div>
15
+
16
+ ## Model
17
+
18
+ internlm-20b-qlora-arxiv-gentitle is fine-tuned from [InternLM-20B](https://huggingface.co/internlm/internlm-20b) with [Arxiv](https://github.com/WangRongsheng/ChatGenTitle) (20200101-20230722, [csAI, csCL, csCV]) dataset by [XTuner](https://github.com/InternLM/xtuner).
19
+
20
+
21
+ ## Quickstart
22
+
23
+ ### Usage with XTuner CLI
24
+
25
+ #### Installation
26
+
27
+ ```shell
28
+ pip install xtuner
29
+ ```
30
+
31
+ #### Chat
32
+
33
+ ```shell
34
+ xtuner chat internlm/internlm-20b --adapter xtuner/internlm-20b-qlora-arxiv-gentitle --prompt-template internlm_chat --system-prompt arxiv_gentile
35
+ ```
36
+
37
+ #### Fine-tune
38
+
39
+ Use the following command to quickly reproduce the fine-tuning results.
40
+
41
+ ```shell
42
+ xtuner train internlm_20b_qlora_arxiv_gentitle_e5
43
+ ```
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "internlm/internlm-20b",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.1,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "v_proj",
18
+ "o_proj",
19
+ "q_proj",
20
+ "gate_proj",
21
+ "k_proj",
22
+ "down_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3cb626964b19b25f0aee8169ac8e3cadcbeecd75834985b8535216f45fc95d4
3
+ size 751345965
xtuner_config.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ from bitsandbytes.optim import PagedAdamW32bit
4
+ from datasets import load_dataset
5
+ from mmengine.dataset import DefaultSampler
6
+ from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook,
7
+ LoggerHook, ParamSchedulerHook)
8
+ from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR
9
+ from peft import LoraConfig
10
+ from transformers import (AutoModelForCausalLM, AutoTokenizer,
11
+ BitsAndBytesConfig)
12
+
13
+ from xtuner.dataset import process_hf_dataset
14
+ from xtuner.dataset.collate_fns import default_collate_fn
15
+ from xtuner.dataset.map_fns import arxiv_map_fn, template_map_fn_factory
16
+ from xtuner.engine import DatasetInfoHook, EvaluateChatHook
17
+ from xtuner.model import SupervisedFinetune
18
+ from xtuner.utils import PROMPT_TEMPLATE, SYSTEM_TEMPLATE
19
+
20
+ #######################################################################
21
+ # PART 1 Settings #
22
+ #######################################################################
23
+ # Model
24
+ pretrained_model_name_or_path = 'internlm/internlm-20b'
25
+
26
+ # Data
27
+ # 1. Download data from https://kaggle.com/datasets/Cornell-University/arxiv
28
+ # 2. Process data by `xtuner preprocess arxiv ${DOWNLOADED_DATA} ./data/arxiv_data.json [optional arguments]` # noqa: E501
29
+ data_path = './data/arxiv_data.json'
30
+ prompt_template = PROMPT_TEMPLATE.internlm_chat
31
+ max_length = 2048
32
+ pack_to_max_length = True
33
+
34
+ # Scheduler & Optimizer
35
+ batch_size = 1 # per_device
36
+ accumulative_counts = 16
37
+ dataloader_num_workers = 0
38
+ max_epochs = 3
39
+ optim_type = PagedAdamW32bit
40
+ lr = 2e-4
41
+ betas = (0.9, 0.999)
42
+ weight_decay = 0
43
+ max_norm = 1 # grad clip
44
+
45
+ # Evaluate the generation performance during the training
46
+ evaluation_freq = 500
47
+ SYSTEM = SYSTEM_TEMPLATE.arxiv_gentile
48
+ evaluation_inputs = [
49
+ ('We present InternLM, a multilingual foundational language '
50
+ 'model with 104B parameters. InternLM is pre-trained on a large '
51
+ 'corpora with 1.6T tokens with a multi-phase progressive '
52
+ 'process, and then fine-tuned to align with human preferences. '
53
+ 'We also developed a training system called Uniscale-LLM for '
54
+ 'efficient large language model training. The evaluation on a '
55
+ 'number of benchmarks shows that InternLM achieves '
56
+ 'state-of-the-art performance in multiple aspects, including '
57
+ 'knowledge understanding, reading comprehension, mathematics, '
58
+ 'and coding. With such well-rounded capabilities, InternLM '
59
+ 'achieves outstanding performances on comprehensive exams, '
60
+ 'including MMLU, AGIEval, C-Eval and GAOKAO-Bench, without '
61
+ 'resorting to external tools. On these benchmarks, InternLM '
62
+ 'not only significantly outperforms open-source models, but '
63
+ 'also obtains superior performance compared to ChatGPT. Also, '
64
+ 'InternLM demonstrates excellent capability of understanding '
65
+ 'Chinese language and Chinese culture, which makes it a '
66
+ 'suitable foundation model to support Chinese-oriented language '
67
+ 'applications. This manuscript gives a detailed study of '
68
+ 'our results, with benchmarks and examples across a diverse '
69
+ 'set of knowledge domains and tasks.'),
70
+ ('In this work, we develop and release Llama 2, a collection of '
71
+ 'pretrained and fine-tuned large language models (LLMs) ranging '
72
+ 'in scale from 7 billion to 70 billion parameters.\nOur '
73
+ 'fine-tuned LLMs, called LLAMA 2-CHAT, are optimized for '
74
+ 'dialogue use cases. Our models outperform open-source chat '
75
+ 'models on most benchmarks we tested, and based on our human '
76
+ 'evaluations for helpfulness and safety, may be a suitable '
77
+ 'substitute for closedsource models. We provide a detailed '
78
+ 'description of our approach to fine-tuning and safety '
79
+ 'improvements of LLAMA 2-CHAT in order to enable the community '
80
+ 'to build on our work and contribute to the responsible '
81
+ 'development of LLMs.')
82
+ ]
83
+
84
+ #######################################################################
85
+ # PART 2 Model & Tokenizer #
86
+ #######################################################################
87
+ tokenizer = dict(
88
+ type=AutoTokenizer.from_pretrained,
89
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
90
+ trust_remote_code=True,
91
+ padding_side='right')
92
+
93
+ model = dict(
94
+ type=SupervisedFinetune,
95
+ llm=dict(
96
+ type=AutoModelForCausalLM.from_pretrained,
97
+ pretrained_model_name_or_path=pretrained_model_name_or_path,
98
+ trust_remote_code=True,
99
+ torch_dtype=torch.float16,
100
+ quantization_config=dict(
101
+ type=BitsAndBytesConfig,
102
+ load_in_4bit=True,
103
+ load_in_8bit=False,
104
+ llm_int8_threshold=6.0,
105
+ llm_int8_has_fp16_weight=False,
106
+ bnb_4bit_compute_dtype=torch.float16,
107
+ bnb_4bit_use_double_quant=True,
108
+ bnb_4bit_quant_type='nf4')),
109
+ lora=dict(
110
+ type=LoraConfig,
111
+ r=64,
112
+ lora_alpha=16,
113
+ lora_dropout=0.1,
114
+ bias='none',
115
+ task_type='CAUSAL_LM'))
116
+
117
+ #######################################################################
118
+ # PART 3 Dataset & Dataloader #
119
+ #######################################################################
120
+ train_dataset = dict(
121
+ type=process_hf_dataset,
122
+ dataset=dict(
123
+ type=load_dataset, path='json', data_files=dict(train=data_path)),
124
+ tokenizer=tokenizer,
125
+ max_length=max_length,
126
+ dataset_map_fn=arxiv_map_fn,
127
+ template_map_fn=dict(
128
+ type=template_map_fn_factory, template=prompt_template),
129
+ remove_unused_columns=True,
130
+ shuffle_before_pack=True,
131
+ pack_to_max_length=pack_to_max_length)
132
+
133
+ train_dataloader = dict(
134
+ batch_size=batch_size,
135
+ num_workers=dataloader_num_workers,
136
+ dataset=train_dataset,
137
+ sampler=dict(type=DefaultSampler, shuffle=True),
138
+ collate_fn=dict(type=default_collate_fn))
139
+
140
+ #######################################################################
141
+ # PART 4 Scheduler & Optimizer #
142
+ #######################################################################
143
+ # optimizer
144
+ optim_wrapper = dict(
145
+ type=AmpOptimWrapper,
146
+ optimizer=dict(
147
+ type=optim_type, lr=lr, betas=betas, weight_decay=weight_decay),
148
+ clip_grad=dict(max_norm=max_norm, error_if_nonfinite=False),
149
+ accumulative_counts=accumulative_counts,
150
+ loss_scale='dynamic',
151
+ dtype='float16')
152
+
153
+ # learning policy
154
+ # More information: https://github.com/open-mmlab/mmengine/blob/main/docs/en/tutorials/param_scheduler.md # noqa: E501
155
+ param_scheduler = dict(
156
+ type=CosineAnnealingLR,
157
+ eta_min=lr * 0.1,
158
+ by_epoch=True,
159
+ T_max=max_epochs,
160
+ convert_to_iter_based=True)
161
+
162
+ # train, val, test setting
163
+ train_cfg = dict(by_epoch=True, max_epochs=max_epochs, val_interval=1)
164
+
165
+ #######################################################################
166
+ # PART 5 Runtime #
167
+ #######################################################################
168
+ # Log the dialogue periodically during the training process, optional
169
+ custom_hooks = [
170
+ dict(type=DatasetInfoHook, tokenizer=tokenizer),
171
+ dict(
172
+ type=EvaluateChatHook,
173
+ tokenizer=tokenizer,
174
+ every_n_iters=evaluation_freq,
175
+ evaluation_inputs=evaluation_inputs,
176
+ system=SYSTEM,
177
+ prompt_template=prompt_template)
178
+ ]
179
+
180
+ # configure default hooks
181
+ default_hooks = dict(
182
+ # record the time of every iteration.
183
+ timer=dict(type=IterTimerHook),
184
+ # print log every 100 iterations.
185
+ logger=dict(type=LoggerHook, interval=10),
186
+ # enable the parameter scheduler.
187
+ param_scheduler=dict(type=ParamSchedulerHook),
188
+ # save checkpoint per epoch.
189
+ checkpoint=dict(type=CheckpointHook, interval=1),
190
+ # set sampler seed in distributed evrionment.
191
+ sampler_seed=dict(type=DistSamplerSeedHook),
192
+ )
193
+
194
+ # configure environment
195
+ env_cfg = dict(
196
+ # whether to enable cudnn benchmark
197
+ cudnn_benchmark=False,
198
+ # set multi process parameters
199
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
200
+ # set distributed parameters
201
+ dist_cfg=dict(backend='nccl'),
202
+ )
203
+
204
+ # set visualizer
205
+ visualizer = None
206
+
207
+ # set log level
208
+ log_level = 'INFO'
209
+
210
+ # load from which checkpoint
211
+ load_from = None
212
+
213
+ # whether to resume training from the loaded checkpoint
214
+ resume = False
215
+
216
+ # Defaults to use random seed and disable `deterministic`
217
+ randomness = dict(seed=None, deterministic=False)