JackeyLai commited on
Commit
92ac392
·
verified ·
1 Parent(s): 328c314

Upload 5 files

Browse files
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/mnt/w2/lzq/nex_LLM/weights/Qwen2.5-3B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "up_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "q_proj",
29
+ "down_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 44.44444444444444,
3
+ "num_input_tokens_seen": 349136,
4
+ "total_flos": 5843993170542592.0,
5
+ "train_loss": 0.6807288352648417,
6
+ "train_runtime": 762.8508,
7
+ "train_samples_per_second": 7.079,
8
+ "train_steps_per_second": 0.197
9
+ }
infer.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ model_name_or_path: /mnt/w2/lzq/nex_LLM/weights/Qwen2.5-3B-Instruct
2
+ template: qwen
3
+ infer_backend: vllm
4
+ vllm_enforce_eager: true
llamaboard_config.yaml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ top.booster: auto
2
+ top.checkpoint_path:
3
+ - train_2024-10-31-11-14-25
4
+ top.finetuning_type: lora
5
+ top.model_name: Qwen2.5-3B-Instruct
6
+ top.quantization_bit: none
7
+ top.quantization_method: bitsandbytes
8
+ top.rope_scaling: none
9
+ top.template: qwen
10
+ train.additional_target: ''
11
+ train.badam_mode: layer
12
+ train.badam_switch_interval: 50
13
+ train.badam_switch_mode: ascending
14
+ train.badam_update_ratio: 0.05
15
+ train.batch_size: 2
16
+ train.compute_type: fp16
17
+ train.create_new_adapter: false
18
+ train.cutoff_len: 1024
19
+ train.dataset:
20
+ - self_cognition
21
+ train.dataset_dir: data
22
+ train.ds_offload: false
23
+ train.ds_stage: none
24
+ train.freeze_extra_modules: ''
25
+ train.freeze_trainable_layers: 2
26
+ train.freeze_trainable_modules: all
27
+ train.galore_rank: 16
28
+ train.galore_scale: 0.25
29
+ train.galore_target: all
30
+ train.galore_update_interval: 200
31
+ train.gradient_accumulation_steps: 8
32
+ train.learning_rate: 5e-5
33
+ train.logging_steps: 5
34
+ train.lora_alpha: 16
35
+ train.lora_dropout: 0
36
+ train.lora_rank: 8
37
+ train.lora_target: all
38
+ train.loraplus_lr_ratio: 0
39
+ train.lr_scheduler_type: cosine
40
+ train.mask_history: false
41
+ train.max_grad_norm: '1.0'
42
+ train.max_samples: '100000'
43
+ train.neat_packing: false
44
+ train.neftune_alpha: 0
45
+ train.num_train_epochs: '100'
46
+ train.optim: adamw_torch
47
+ train.packing: false
48
+ train.ppo_score_norm: false
49
+ train.ppo_whiten_rewards: false
50
+ train.pref_beta: 0.1
51
+ train.pref_ftx: 0
52
+ train.pref_loss: sigmoid
53
+ train.report_to: false
54
+ train.resize_vocab: false
55
+ train.reward_model: null
56
+ train.save_steps: 100
57
+ train.shift_attn: false
58
+ train.train_on_prompt: false
59
+ train.training_stage: Supervised Fine-Tuning
60
+ train.use_badam: false
61
+ train.use_dora: false
62
+ train.use_galore: false
63
+ train.use_llama_pro: false
64
+ train.use_pissa: false
65
+ train.use_rslora: false
66
+ train.val_size: 0
67
+ train.warmup_steps: 0