LZHgrla commited on
Commit
831d69a
1 Parent(s): deb3c32
Files changed (3) hide show
  1. adapter_config.json +27 -0
  2. adapter_model.bin +3 -0
  3. xtuner_config.py +164 -0
adapter_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "internlm/internlm2-chat-7b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 16,
12
+ "lora_dropout": 0.1,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 64,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "w3",
20
+ "wo",
21
+ "w1",
22
+ "output",
23
+ "wqkv",
24
+ "w2"
25
+ ],
26
+ "task_type": "CAUSAL_LM"
27
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3cc443d46eeffddd667182757cd09514c9022c679aea8eda7f61b1f6f854bca
3
+ size 314476114
xtuner_config.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SYSTEM = ''
2
+ accumulative_counts = 16
3
+ batch_size = 1
4
+ betas = (
5
+ 0.9,
6
+ 0.999,
7
+ )
8
+ custom_hooks = [
9
+ dict(
10
+ tokenizer=dict(
11
+ padding_side='right',
12
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
13
+ trust_remote_code=True,
14
+ type='transformers.AutoTokenizer.from_pretrained'),
15
+ type='xtuner.engine.DatasetInfoHook'),
16
+ dict(
17
+ evaluation_inputs=[
18
+ '请给我介绍五个上海的景点',
19
+ 'Please tell me five scenic spots in Shanghai',
20
+ ],
21
+ every_n_iters=500,
22
+ prompt_template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
23
+ system='',
24
+ tokenizer=dict(
25
+ padding_side='right',
26
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
27
+ trust_remote_code=True,
28
+ type='transformers.AutoTokenizer.from_pretrained'),
29
+ type='xtuner.engine.EvaluateChatHook'),
30
+ ]
31
+ data_path = 'timdettmers/openassistant-guanaco'
32
+ dataloader_num_workers = 0
33
+ default_hooks = dict(
34
+ checkpoint=dict(interval=1, type='mmengine.hooks.CheckpointHook'),
35
+ logger=dict(interval=10, type='mmengine.hooks.LoggerHook'),
36
+ param_scheduler=dict(type='mmengine.hooks.ParamSchedulerHook'),
37
+ sampler_seed=dict(type='mmengine.hooks.DistSamplerSeedHook'),
38
+ timer=dict(type='mmengine.hooks.IterTimerHook'))
39
+ env_cfg = dict(
40
+ cudnn_benchmark=False,
41
+ dist_cfg=dict(backend='nccl'),
42
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
43
+ evaluation_freq = 500
44
+ evaluation_inputs = [
45
+ '请给我介绍五个上海的景点',
46
+ 'Please tell me five scenic spots in Shanghai',
47
+ ]
48
+ launcher = 'none'
49
+ load_from = None
50
+ log_level = 'INFO'
51
+ lr = 0.0002
52
+ max_epochs = 3
53
+ max_length = 2048
54
+ max_norm = 1
55
+ model = dict(
56
+ llm=dict(
57
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
58
+ quantization_config=dict(
59
+ bnb_4bit_compute_dtype='torch.float16',
60
+ bnb_4bit_quant_type='nf4',
61
+ bnb_4bit_use_double_quant=True,
62
+ llm_int8_has_fp16_weight=False,
63
+ llm_int8_threshold=6.0,
64
+ load_in_4bit=True,
65
+ load_in_8bit=False,
66
+ type='transformers.BitsAndBytesConfig'),
67
+ torch_dtype='torch.float16',
68
+ trust_remote_code=True,
69
+ type='transformers.AutoModelForCausalLM.from_pretrained'),
70
+ lora=dict(
71
+ bias='none',
72
+ lora_alpha=16,
73
+ lora_dropout=0.1,
74
+ r=64,
75
+ task_type='CAUSAL_LM',
76
+ type='peft.LoraConfig'),
77
+ type='xtuner.model.SupervisedFinetune')
78
+ optim_type = 'torch.optim.AdamW'
79
+ optim_wrapper = dict(
80
+ accumulative_counts=16,
81
+ clip_grad=dict(error_if_nonfinite=False, max_norm=1),
82
+ dtype='float16',
83
+ loss_scale='dynamic',
84
+ optimizer=dict(
85
+ betas=(
86
+ 0.9,
87
+ 0.999,
88
+ ),
89
+ lr=0.0002,
90
+ type='torch.optim.AdamW',
91
+ weight_decay=0),
92
+ type='mmengine.optim.AmpOptimWrapper')
93
+ pack_to_max_length = True
94
+ param_scheduler = [
95
+ dict(
96
+ begin=0,
97
+ by_epoch=True,
98
+ convert_to_iter_based=True,
99
+ end=0.09,
100
+ start_factor=1e-05,
101
+ type='mmengine.optim.LinearLR'),
102
+ dict(
103
+ T_max=3,
104
+ begin=0.09,
105
+ by_epoch=True,
106
+ convert_to_iter_based=True,
107
+ eta_min=0.0,
108
+ type='mmengine.optim.CosineAnnealingLR'),
109
+ ]
110
+ pretrained_model_name_or_path = 'internlm/internlm2-chat-7b'
111
+ prompt_template = 'xtuner.utils.PROMPT_TEMPLATE.internlm2_chat'
112
+ randomness = dict(deterministic=False, seed=None)
113
+ resume = False
114
+ tokenizer = dict(
115
+ padding_side='right',
116
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
117
+ trust_remote_code=True,
118
+ type='transformers.AutoTokenizer.from_pretrained')
119
+ train_cfg = dict(by_epoch=True, max_epochs=3, val_interval=1)
120
+ train_dataloader = dict(
121
+ batch_size=1,
122
+ collate_fn=dict(type='xtuner.dataset.collate_fns.default_collate_fn'),
123
+ dataset=dict(
124
+ dataset=dict(
125
+ path='timdettmers/openassistant-guanaco',
126
+ type='datasets.load_dataset'),
127
+ dataset_map_fn='xtuner.dataset.map_fns.oasst1_map_fn',
128
+ max_length=2048,
129
+ pack_to_max_length=True,
130
+ remove_unused_columns=True,
131
+ shuffle_before_pack=True,
132
+ template_map_fn=dict(
133
+ template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
134
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
135
+ tokenizer=dict(
136
+ padding_side='right',
137
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
138
+ trust_remote_code=True,
139
+ type='transformers.AutoTokenizer.from_pretrained'),
140
+ type='xtuner.dataset.process_hf_dataset'),
141
+ num_workers=0,
142
+ sampler=dict(shuffle=True, type='mmengine.dataset.DefaultSampler'))
143
+ train_dataset = dict(
144
+ dataset=dict(
145
+ path='timdettmers/openassistant-guanaco',
146
+ type='datasets.load_dataset'),
147
+ dataset_map_fn='xtuner.dataset.map_fns.oasst1_map_fn',
148
+ max_length=2048,
149
+ pack_to_max_length=True,
150
+ remove_unused_columns=True,
151
+ shuffle_before_pack=True,
152
+ template_map_fn=dict(
153
+ template='xtuner.utils.PROMPT_TEMPLATE.internlm2_chat',
154
+ type='xtuner.dataset.map_fns.template_map_fn_factory'),
155
+ tokenizer=dict(
156
+ padding_side='right',
157
+ pretrained_model_name_or_path='internlm/internlm2-chat-7b',
158
+ trust_remote_code=True,
159
+ type='transformers.AutoTokenizer.from_pretrained'),
160
+ type='xtuner.dataset.process_hf_dataset')
161
+ visualizer = None
162
+ warmup_ratio = 0.03
163
+ weight_decay = 0
164
+ work_dir = './work_dirs/internlm2_chat_7b_qlora_oasst1_e3'