zhangzhao219 commited on
Commit
a542899
1 Parent(s): 10bfd24

Upload 96 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoints/v08-20240205-114459/README.md +10 -0
  2. checkpoints/v08-20240205-114459/configuration.json +11 -0
  3. checkpoints/v08-20240205-114459/default/adapter_config.json +36 -0
  4. checkpoints/v08-20240205-114459/default/adapter_model.safetensors +3 -0
  5. checkpoints/v08-20240205-114459/generation_config.json +7 -0
  6. checkpoints/v08-20240205-114459/sft_args.json +119 -0
  7. checkpoints/v08-20240205-114459/special_tokens_map.json +30 -0
  8. checkpoints/v08-20240205-114459/tokenizer.json +0 -0
  9. checkpoints/v08-20240205-114459/tokenizer.model +3 -0
  10. checkpoints/v08-20240205-114459/tokenizer_config.json +43 -0
  11. checkpoints/v08-20240205-114459/trainer_state.json +156 -0
  12. checkpoints/v08-20240205-114459/training_args.bin +3 -0
  13. checkpoints/v10-20240205-114325/README.md +10 -0
  14. checkpoints/v10-20240205-114325/configuration.json +11 -0
  15. checkpoints/v10-20240205-114325/default/adapter_config.json +36 -0
  16. checkpoints/v10-20240205-114325/default/adapter_model.safetensors +3 -0
  17. checkpoints/v10-20240205-114325/generation_config.json +7 -0
  18. checkpoints/v10-20240205-114325/sft_args.json +119 -0
  19. checkpoints/v10-20240205-114325/special_tokens_map.json +30 -0
  20. checkpoints/v10-20240205-114325/tokenizer.json +0 -0
  21. checkpoints/v10-20240205-114325/tokenizer.model +3 -0
  22. checkpoints/v10-20240205-114325/tokenizer_config.json +43 -0
  23. checkpoints/v10-20240205-114325/trainer_state.json +156 -0
  24. checkpoints/v10-20240205-114325/training_args.bin +3 -0
  25. checkpoints/v13-20240202-072530/README.md +10 -0
  26. checkpoints/v13-20240202-072530/configuration.json +11 -0
  27. checkpoints/v13-20240202-072530/default/adapter_config.json +35 -0
  28. checkpoints/v13-20240202-072530/default/adapter_model.safetensors +3 -0
  29. checkpoints/v13-20240202-072530/generation_config.json +7 -0
  30. checkpoints/v13-20240202-072530/sft_args.json +129 -0
  31. checkpoints/v13-20240202-072530/special_tokens_map.json +30 -0
  32. checkpoints/v13-20240202-072530/tokenizer.json +0 -0
  33. checkpoints/v13-20240202-072530/tokenizer.model +3 -0
  34. checkpoints/v13-20240202-072530/tokenizer_config.json +43 -0
  35. checkpoints/v13-20240202-072530/trainer_state.json +293 -0
  36. checkpoints/v13-20240202-072530/training_args.bin +3 -0
  37. checkpoints/v13-20240206-111010/README.md +10 -0
  38. checkpoints/v13-20240206-111010/configuration.json +11 -0
  39. checkpoints/v13-20240206-111010/default/adapter_config.json +36 -0
  40. checkpoints/v13-20240206-111010/default/adapter_model.safetensors +3 -0
  41. checkpoints/v13-20240206-111010/generation_config.json +7 -0
  42. checkpoints/v13-20240206-111010/sft_args.json +119 -0
  43. checkpoints/v13-20240206-111010/special_tokens_map.json +30 -0
  44. checkpoints/v13-20240206-111010/tokenizer.json +0 -0
  45. checkpoints/v13-20240206-111010/tokenizer.model +3 -0
  46. checkpoints/v13-20240206-111010/tokenizer_config.json +43 -0
  47. checkpoints/v13-20240206-111010/trainer_state.json +156 -0
  48. checkpoints/v13-20240206-111010/training_args.bin +3 -0
  49. checkpoints/v16-20240206-224659/README.md +10 -0
  50. checkpoints/v16-20240206-224659/configuration.json +11 -0
checkpoints/v08-20240205-114459/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training procedure
2
+
3
+ ### Framework versions
4
+
5
+
6
+ - SWIFT 1.5.2
7
+ ### Base model information
8
+
9
+
10
+ - BaseModel Class LlamaForCausalLM
checkpoints/v08-20240205-114459/configuration.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_cfg": {
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "sft_type": "lora",
6
+ "tuner_backend": "swift",
7
+ "template_type": "llama",
8
+ "dtype": "fp16",
9
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
10
+ }
11
+ }
checkpoints/v08-20240205-114459/default/adapter_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "enable_lora": null,
7
+ "fan_in_fan_out": false,
8
+ "inference_mode": false,
9
+ "init_lora_weights": true,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "lora_dtype": "fp32",
16
+ "megatron_config": null,
17
+ "megatron_core": "megatron.core",
18
+ "modules_to_save": [],
19
+ "peft_type": null,
20
+ "r": 8,
21
+ "rank_pattern": {},
22
+ "revision": null,
23
+ "swift_type": "LORA",
24
+ "target_modules": [
25
+ "o_proj",
26
+ "down_proj",
27
+ "q_proj",
28
+ "up_proj",
29
+ "v_proj",
30
+ "gate_proj",
31
+ "k_proj"
32
+ ],
33
+ "task_type": null,
34
+ "use_merged_linear": false,
35
+ "use_qa_lora": false
36
+ }
checkpoints/v08-20240205-114459/default/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7357403b5379ca631fc54f34056c390fb37172056a7d1fc9e68d50205ad0c8
3
+ size 125912272
checkpoints/v08-20240205-114459/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token_id": 2,
3
+ "max_new_tokens": 512,
4
+ "pad_token_id": 2,
5
+ "repetition_penalty": 0.99,
6
+ "transformers_version": "4.37.2"
7
+ }
checkpoints/v08-20240205-114459/sft_args.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "solar-10-7b-instruct-v1",
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
6
+ "sft_type": "lora",
7
+ "freeze_parameters": 0.0,
8
+ "additional_trainable_parameters": [],
9
+ "tuner_backend": "swift",
10
+ "template_type": "llama",
11
+ "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459",
12
+ "add_output_dir_suffix": true,
13
+ "ddp_backend": "nccl",
14
+ "seed": 42,
15
+ "resume_from_checkpoint": null,
16
+ "dtype": "fp16",
17
+ "dataset": [
18
+ "_custom_dataset"
19
+ ],
20
+ "dataset_seed": 42,
21
+ "dataset_test_ratio": 0.01,
22
+ "train_dataset_sample": -1,
23
+ "val_dataset_sample": null,
24
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
25
+ "max_length": 3072,
26
+ "truncation_strategy": "delete",
27
+ "check_dataset_strategy": "warning",
28
+ "custom_train_dataset_path": [
29
+ "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/1.01/release_train_data.json"
30
+ ],
31
+ "custom_val_dataset_path": [],
32
+ "self_cognition_sample": 0,
33
+ "model_name": [
34
+ null,
35
+ null
36
+ ],
37
+ "model_author": [
38
+ null,
39
+ null
40
+ ],
41
+ "quantization_bit": 0,
42
+ "bnb_4bit_comp_dtype": "fp16",
43
+ "bnb_4bit_quant_type": "nf4",
44
+ "bnb_4bit_use_double_quant": true,
45
+ "lora_target_modules": [
46
+ "o_proj",
47
+ "down_proj",
48
+ "q_proj",
49
+ "up_proj",
50
+ "v_proj",
51
+ "gate_proj",
52
+ "k_proj"
53
+ ],
54
+ "lora_rank": 8,
55
+ "lora_alpha": 16,
56
+ "lora_dropout_p": 0.05,
57
+ "lora_bias_trainable": "none",
58
+ "lora_modules_to_save": [],
59
+ "lora_dtype": "fp32",
60
+ "neftune_alpha": 0.0,
61
+ "gradient_checkpointing": true,
62
+ "deepspeed_config_path": null,
63
+ "batch_size": 1,
64
+ "eval_batch_size": 1,
65
+ "num_train_epochs": 4,
66
+ "max_steps": -1,
67
+ "optim": "adamw_torch",
68
+ "adam_beta1": 0.9,
69
+ "adam_beta2": 0.999,
70
+ "learning_rate": 0.0001,
71
+ "weight_decay": 0.01,
72
+ "gradient_accumulation_steps": 8,
73
+ "max_grad_norm": 0.5,
74
+ "predict_with_generate": false,
75
+ "lr_scheduler_type": "linear",
76
+ "warmup_ratio": 0.03,
77
+ "eval_steps": 1700,
78
+ "save_steps": 1700,
79
+ "save_only_model": true,
80
+ "save_total_limit": null,
81
+ "logging_steps": 100,
82
+ "dataloader_num_workers": 1,
83
+ "push_to_hub": false,
84
+ "hub_model_id": "solar-10-7b-instruct-v1-lora",
85
+ "hub_private_repo": true,
86
+ "push_hub_strategy": "push_best",
87
+ "hub_token": null,
88
+ "test_oom_error": false,
89
+ "disable_tqdm": false,
90
+ "lazy_tokenize": false,
91
+ "preprocess_num_proc": 1,
92
+ "use_flash_attn": null,
93
+ "ignore_args_error": false,
94
+ "check_model_is_latest": false,
95
+ "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459/runs",
96
+ "report_to": [
97
+ "all"
98
+ ],
99
+ "acc_strategy": "token",
100
+ "save_on_each_node": true,
101
+ "save_strategy": "steps",
102
+ "save_safetensors": true,
103
+ "max_new_tokens": 2048,
104
+ "do_sample": true,
105
+ "temperature": 0.3,
106
+ "top_k": 20,
107
+ "top_p": 0.7,
108
+ "repetition_penalty": 1.05,
109
+ "num_beams": 1,
110
+ "only_save_model": true,
111
+ "torch_dtype": "torch.float16",
112
+ "fp16": true,
113
+ "bf16": false,
114
+ "bnb_4bit_compute_dtype": "torch.float16",
115
+ "load_in_4bit": false,
116
+ "load_in_8bit": false,
117
+ "train_sampler_random": true,
118
+ "deepspeed": null
119
+ }
checkpoints/v08-20240205-114459/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoints/v08-20240205-114459/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/v08-20240205-114459/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoints/v08-20240205-114459/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }
checkpoints/v08-20240205-114459/trainer_state.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.69346523,
3
+ "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459/checkpoint-1700",
4
+ "epoch": 1.9144144144144144,
5
+ "eval_steps": 1700,
6
+ "global_step": 1700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "acc": 0.67516661,
13
+ "epoch": 0.0,
14
+ "learning_rate": 9.345794392523364e-07,
15
+ "loss": 1.46797299,
16
+ "step": 1
17
+ },
18
+ {
19
+ "acc": 0.74959772,
20
+ "epoch": 0.11,
21
+ "learning_rate": 9.158878504672898e-05,
22
+ "loss": 0.95736756,
23
+ "step": 100
24
+ },
25
+ {
26
+ "acc": 0.78039505,
27
+ "epoch": 0.23,
28
+ "learning_rate": 9.735849056603774e-05,
29
+ "loss": 0.77719513,
30
+ "step": 200
31
+ },
32
+ {
33
+ "acc": 0.78730965,
34
+ "epoch": 0.34,
35
+ "learning_rate": 9.4455732946299e-05,
36
+ "loss": 0.74986748,
37
+ "step": 300
38
+ },
39
+ {
40
+ "acc": 0.79136169,
41
+ "epoch": 0.45,
42
+ "learning_rate": 9.155297532656024e-05,
43
+ "loss": 0.73882896,
44
+ "step": 400
45
+ },
46
+ {
47
+ "acc": 0.79132759,
48
+ "epoch": 0.56,
49
+ "learning_rate": 8.865021770682148e-05,
50
+ "loss": 0.7381105,
51
+ "step": 500
52
+ },
53
+ {
54
+ "acc": 0.79090462,
55
+ "epoch": 0.68,
56
+ "learning_rate": 8.574746008708273e-05,
57
+ "loss": 0.73391434,
58
+ "step": 600
59
+ },
60
+ {
61
+ "acc": 0.79388229,
62
+ "epoch": 0.79,
63
+ "learning_rate": 8.284470246734399e-05,
64
+ "loss": 0.72463707,
65
+ "step": 700
66
+ },
67
+ {
68
+ "acc": 0.79177132,
69
+ "epoch": 0.9,
70
+ "learning_rate": 7.994194484760524e-05,
71
+ "loss": 0.72666443,
72
+ "step": 800
73
+ },
74
+ {
75
+ "acc": 0.79278763,
76
+ "epoch": 1.01,
77
+ "learning_rate": 7.703918722786648e-05,
78
+ "loss": 0.72437874,
79
+ "step": 900
80
+ },
81
+ {
82
+ "acc": 0.81367294,
83
+ "epoch": 1.13,
84
+ "learning_rate": 7.413642960812773e-05,
85
+ "loss": 0.63475815,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "acc": 0.81142548,
90
+ "epoch": 1.24,
91
+ "learning_rate": 7.123367198838897e-05,
92
+ "loss": 0.63902611,
93
+ "step": 1100
94
+ },
95
+ {
96
+ "acc": 0.81469452,
97
+ "epoch": 1.35,
98
+ "learning_rate": 6.833091436865022e-05,
99
+ "loss": 0.63060787,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "acc": 0.81454559,
104
+ "epoch": 1.46,
105
+ "learning_rate": 6.542815674891147e-05,
106
+ "loss": 0.63029087,
107
+ "step": 1300
108
+ },
109
+ {
110
+ "acc": 0.81508331,
111
+ "epoch": 1.58,
112
+ "learning_rate": 6.252539912917271e-05,
113
+ "loss": 0.62843025,
114
+ "step": 1400
115
+ },
116
+ {
117
+ "acc": 0.81447418,
118
+ "epoch": 1.69,
119
+ "learning_rate": 5.9622641509433966e-05,
120
+ "loss": 0.62912048,
121
+ "step": 1500
122
+ },
123
+ {
124
+ "acc": 0.81561844,
125
+ "epoch": 1.8,
126
+ "learning_rate": 5.671988388969521e-05,
127
+ "loss": 0.63019512,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "acc": 0.8182132,
132
+ "epoch": 1.91,
133
+ "learning_rate": 5.381712626995646e-05,
134
+ "loss": 0.61591331,
135
+ "step": 1700
136
+ },
137
+ {
138
+ "epoch": 1.91,
139
+ "eval_acc": 0.8075339535878515,
140
+ "eval_loss": 0.6934652328491211,
141
+ "eval_runtime": 19.4269,
142
+ "eval_samples_per_second": 7.412,
143
+ "eval_steps_per_second": 3.706,
144
+ "step": 1700
145
+ }
146
+ ],
147
+ "logging_steps": 100,
148
+ "max_steps": 3552,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 4,
151
+ "save_steps": 1700,
152
+ "total_flos": 2.3576867592376156e+18,
153
+ "train_batch_size": 1,
154
+ "trial_name": null,
155
+ "trial_params": null
156
+ }
checkpoints/v08-20240205-114459/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e6e97628354f5c8d33acf937c5402484caf81e09d587f30f86c4f9d252cf64
3
+ size 6584
checkpoints/v10-20240205-114325/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training procedure
2
+
3
+ ### Framework versions
4
+
5
+
6
+ - SWIFT 1.5.2
7
+ ### Base model information
8
+
9
+
10
+ - BaseModel Class LlamaForCausalLM
checkpoints/v10-20240205-114325/configuration.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_cfg": {
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "sft_type": "lora",
6
+ "tuner_backend": "swift",
7
+ "template_type": "llama",
8
+ "dtype": "fp16",
9
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
10
+ }
11
+ }
checkpoints/v10-20240205-114325/default/adapter_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "enable_lora": null,
7
+ "fan_in_fan_out": false,
8
+ "inference_mode": false,
9
+ "init_lora_weights": true,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "lora_dtype": "fp32",
16
+ "megatron_config": null,
17
+ "megatron_core": "megatron.core",
18
+ "modules_to_save": [],
19
+ "peft_type": null,
20
+ "r": 8,
21
+ "rank_pattern": {},
22
+ "revision": null,
23
+ "swift_type": "LORA",
24
+ "target_modules": [
25
+ "up_proj",
26
+ "q_proj",
27
+ "v_proj",
28
+ "o_proj",
29
+ "k_proj",
30
+ "gate_proj",
31
+ "down_proj"
32
+ ],
33
+ "task_type": null,
34
+ "use_merged_linear": false,
35
+ "use_qa_lora": false
36
+ }
checkpoints/v10-20240205-114325/default/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d656c815960e6df076b50a811fc80e221919ab9a47b175dfeee5fa5c08acca
3
+ size 125912272
checkpoints/v10-20240205-114325/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token_id": 2,
3
+ "max_new_tokens": 512,
4
+ "pad_token_id": 2,
5
+ "repetition_penalty": 0.99,
6
+ "transformers_version": "4.37.2"
7
+ }
checkpoints/v10-20240205-114325/sft_args.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "solar-10-7b-instruct-v1",
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
6
+ "sft_type": "lora",
7
+ "freeze_parameters": 0.0,
8
+ "additional_trainable_parameters": [],
9
+ "tuner_backend": "swift",
10
+ "template_type": "llama",
11
+ "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325",
12
+ "add_output_dir_suffix": true,
13
+ "ddp_backend": "nccl",
14
+ "seed": 42,
15
+ "resume_from_checkpoint": null,
16
+ "dtype": "fp16",
17
+ "dataset": [
18
+ "_custom_dataset"
19
+ ],
20
+ "dataset_seed": 42,
21
+ "dataset_test_ratio": 0.01,
22
+ "train_dataset_sample": -1,
23
+ "val_dataset_sample": null,
24
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
25
+ "max_length": 3072,
26
+ "truncation_strategy": "delete",
27
+ "check_dataset_strategy": "warning",
28
+ "custom_train_dataset_path": [
29
+ "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/1.01/release_train_data.json"
30
+ ],
31
+ "custom_val_dataset_path": [],
32
+ "self_cognition_sample": 0,
33
+ "model_name": [
34
+ null,
35
+ null
36
+ ],
37
+ "model_author": [
38
+ null,
39
+ null
40
+ ],
41
+ "quantization_bit": 0,
42
+ "bnb_4bit_comp_dtype": "fp16",
43
+ "bnb_4bit_quant_type": "nf4",
44
+ "bnb_4bit_use_double_quant": true,
45
+ "lora_target_modules": [
46
+ "up_proj",
47
+ "q_proj",
48
+ "v_proj",
49
+ "o_proj",
50
+ "k_proj",
51
+ "gate_proj",
52
+ "down_proj"
53
+ ],
54
+ "lora_rank": 8,
55
+ "lora_alpha": 16,
56
+ "lora_dropout_p": 0.05,
57
+ "lora_bias_trainable": "none",
58
+ "lora_modules_to_save": [],
59
+ "lora_dtype": "fp32",
60
+ "neftune_alpha": 0.0,
61
+ "gradient_checkpointing": true,
62
+ "deepspeed_config_path": null,
63
+ "batch_size": 1,
64
+ "eval_batch_size": 1,
65
+ "num_train_epochs": 4,
66
+ "max_steps": -1,
67
+ "optim": "adamw_torch",
68
+ "adam_beta1": 0.9,
69
+ "adam_beta2": 0.999,
70
+ "learning_rate": 0.0001,
71
+ "weight_decay": 0.01,
72
+ "gradient_accumulation_steps": 8,
73
+ "max_grad_norm": 0.5,
74
+ "predict_with_generate": false,
75
+ "lr_scheduler_type": "linear",
76
+ "warmup_ratio": 0.03,
77
+ "eval_steps": 1700,
78
+ "save_steps": 1700,
79
+ "save_only_model": true,
80
+ "save_total_limit": null,
81
+ "logging_steps": 100,
82
+ "dataloader_num_workers": 1,
83
+ "push_to_hub": false,
84
+ "hub_model_id": "solar-10-7b-instruct-v1-lora",
85
+ "hub_private_repo": true,
86
+ "push_hub_strategy": "push_best",
87
+ "hub_token": null,
88
+ "test_oom_error": false,
89
+ "disable_tqdm": false,
90
+ "lazy_tokenize": false,
91
+ "preprocess_num_proc": 1,
92
+ "use_flash_attn": null,
93
+ "ignore_args_error": false,
94
+ "check_model_is_latest": false,
95
+ "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325/runs",
96
+ "report_to": [
97
+ "all"
98
+ ],
99
+ "acc_strategy": "token",
100
+ "save_on_each_node": true,
101
+ "save_strategy": "steps",
102
+ "save_safetensors": true,
103
+ "max_new_tokens": 2048,
104
+ "do_sample": true,
105
+ "temperature": 0.3,
106
+ "top_k": 20,
107
+ "top_p": 0.7,
108
+ "repetition_penalty": 1.05,
109
+ "num_beams": 1,
110
+ "only_save_model": true,
111
+ "torch_dtype": "torch.float16",
112
+ "fp16": true,
113
+ "bf16": false,
114
+ "bnb_4bit_compute_dtype": "torch.float16",
115
+ "load_in_4bit": false,
116
+ "load_in_8bit": false,
117
+ "train_sampler_random": true,
118
+ "deepspeed": null
119
+ }
checkpoints/v10-20240205-114325/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoints/v10-20240205-114325/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/v10-20240205-114325/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoints/v10-20240205-114325/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }
checkpoints/v10-20240205-114325/trainer_state.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.68919861,
3
+ "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325/checkpoint-1700",
4
+ "epoch": 1.9144144144144144,
5
+ "eval_steps": 1700,
6
+ "global_step": 1700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "acc": 0.67516661,
13
+ "epoch": 0.0,
14
+ "learning_rate": 9.345794392523364e-07,
15
+ "loss": 1.46797299,
16
+ "step": 1
17
+ },
18
+ {
19
+ "acc": 0.75035373,
20
+ "epoch": 0.11,
21
+ "learning_rate": 9.158878504672898e-05,
22
+ "loss": 0.95308616,
23
+ "step": 100
24
+ },
25
+ {
26
+ "acc": 0.78029877,
27
+ "epoch": 0.23,
28
+ "learning_rate": 9.735849056603774e-05,
29
+ "loss": 0.77814468,
30
+ "step": 200
31
+ },
32
+ {
33
+ "acc": 0.78718307,
34
+ "epoch": 0.34,
35
+ "learning_rate": 9.4455732946299e-05,
36
+ "loss": 0.75001068,
37
+ "step": 300
38
+ },
39
+ {
40
+ "acc": 0.79127777,
41
+ "epoch": 0.45,
42
+ "learning_rate": 9.155297532656024e-05,
43
+ "loss": 0.73783844,
44
+ "step": 400
45
+ },
46
+ {
47
+ "acc": 0.79111923,
48
+ "epoch": 0.56,
49
+ "learning_rate": 8.865021770682148e-05,
50
+ "loss": 0.73792175,
51
+ "step": 500
52
+ },
53
+ {
54
+ "acc": 0.79054779,
55
+ "epoch": 0.68,
56
+ "learning_rate": 8.577648766328012e-05,
57
+ "loss": 0.73389267,
58
+ "step": 600
59
+ },
60
+ {
61
+ "acc": 0.79359085,
62
+ "epoch": 0.79,
63
+ "learning_rate": 8.287373004354137e-05,
64
+ "loss": 0.72452591,
65
+ "step": 700
66
+ },
67
+ {
68
+ "acc": 0.79235954,
69
+ "epoch": 0.9,
70
+ "learning_rate": 7.997097242380261e-05,
71
+ "loss": 0.72603645,
72
+ "step": 800
73
+ },
74
+ {
75
+ "acc": 0.79280067,
76
+ "epoch": 1.01,
77
+ "learning_rate": 7.706821480406386e-05,
78
+ "loss": 0.72392105,
79
+ "step": 900
80
+ },
81
+ {
82
+ "acc": 0.81310547,
83
+ "epoch": 1.13,
84
+ "learning_rate": 7.416545718432511e-05,
85
+ "loss": 0.63417343,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "acc": 0.81126122,
90
+ "epoch": 1.24,
91
+ "learning_rate": 7.126269956458636e-05,
92
+ "loss": 0.63897865,
93
+ "step": 1100
94
+ },
95
+ {
96
+ "acc": 0.81425156,
97
+ "epoch": 1.35,
98
+ "learning_rate": 6.83599419448476e-05,
99
+ "loss": 0.63155251,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "acc": 0.81428162,
104
+ "epoch": 1.46,
105
+ "learning_rate": 6.545718432510885e-05,
106
+ "loss": 0.63026398,
107
+ "step": 1300
108
+ },
109
+ {
110
+ "acc": 0.81506256,
111
+ "epoch": 1.58,
112
+ "learning_rate": 6.25544267053701e-05,
113
+ "loss": 0.62718761,
114
+ "step": 1400
115
+ },
116
+ {
117
+ "acc": 0.81431351,
118
+ "epoch": 1.69,
119
+ "learning_rate": 5.965166908563136e-05,
120
+ "loss": 0.62935677,
121
+ "step": 1500
122
+ },
123
+ {
124
+ "acc": 0.81549332,
125
+ "epoch": 1.8,
126
+ "learning_rate": 5.6748911465892595e-05,
127
+ "loss": 0.62953285,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "acc": 0.81804367,
132
+ "epoch": 1.91,
133
+ "learning_rate": 5.384615384615385e-05,
134
+ "loss": 0.61590744,
135
+ "step": 1700
136
+ },
137
+ {
138
+ "epoch": 1.91,
139
+ "eval_acc": 0.8087905036894449,
140
+ "eval_loss": 0.6891986131668091,
141
+ "eval_runtime": 19.5638,
142
+ "eval_samples_per_second": 7.361,
143
+ "eval_steps_per_second": 3.68,
144
+ "step": 1700
145
+ }
146
+ ],
147
+ "logging_steps": 100,
148
+ "max_steps": 3552,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 4,
151
+ "save_steps": 1700,
152
+ "total_flos": 2.3576867592376156e+18,
153
+ "train_batch_size": 1,
154
+ "trial_name": null,
155
+ "trial_params": null
156
+ }
checkpoints/v10-20240205-114325/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:512e7e11bc024f99d29cf030056db6d63e94df8c427e85aad6b7706e11907c29
3
+ size 6584
checkpoints/v13-20240202-072530/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training procedure
2
+
3
+ ### Framework versions
4
+
5
+
6
+ - SWIFT 1.5.0
7
+ ### Base model information
8
+
9
+
10
+ - BaseModel Class LlamaForCausalLM
checkpoints/v13-20240202-072530/configuration.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_cfg": {
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "sft_type": "lora",
6
+ "tuner_backend": "swift",
7
+ "template_type": "llama",
8
+ "dtype": "fp16",
9
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
10
+ }
11
+ }
checkpoints/v13-20240202-072530/default/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "enable_lora": null,
7
+ "fan_in_fan_out": false,
8
+ "inference_mode": false,
9
+ "init_lora_weights": true,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": null,
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "swift_type": "LORA",
23
+ "target_modules": [
24
+ "down_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "up_proj",
28
+ "gate_proj",
29
+ "v_proj",
30
+ "q_proj"
31
+ ],
32
+ "task_type": null,
33
+ "use_merged_linear": false,
34
+ "use_qa_lora": false
35
+ }
checkpoints/v13-20240202-072530/default/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8f6611b6316ded5bbacfc99c8015f21eb07858e2419695ba2a61cbc21c3f63
3
+ size 62997320
checkpoints/v13-20240202-072530/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token_id": 2,
3
+ "max_new_tokens": 512,
4
+ "pad_token_id": 2,
5
+ "repetition_penalty": 0.99,
6
+ "transformers_version": "4.37.2"
7
+ }
checkpoints/v13-20240202-072530/sft_args.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "solar-10-7b-instruct-v1",
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "model_cache_dir": "/home/aiscuser/Swift-Scripts/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
6
+ "sft_type": "lora",
7
+ "freeze_parameters": 0.0,
8
+ "tuner_backend": "swift",
9
+ "template_type": "llama",
10
+ "output_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530",
11
+ "add_output_dir_suffix": true,
12
+ "custom_output_dir_suffix": null,
13
+ "ddp_backend": "nccl",
14
+ "seed": 42,
15
+ "resume_from_checkpoint": null,
16
+ "dtype": "fp16",
17
+ "dataset": [
18
+ "_custom_dataset"
19
+ ],
20
+ "dataset_seed": 42,
21
+ "dataset_test_ratio": 0.01,
22
+ "train_dataset_sample": -1,
23
+ "val_dataset_sample": null,
24
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
25
+ "max_length": 3072,
26
+ "truncation_strategy": "delete",
27
+ "check_dataset_strategy": "warning",
28
+ "custom_train_dataset_path": [
29
+ "/home/aiscuser/Swift-Scripts/data/wsdm/model/Pseudo/best_eval_1.01/release_train_data.json"
30
+ ],
31
+ "custom_val_dataset_path": [],
32
+ "self_cognition_sample": 0,
33
+ "model_name": null,
34
+ "model_author": null,
35
+ "quantization_bit": 0,
36
+ "bnb_4bit_comp_dtype": "fp16",
37
+ "bnb_4bit_quant_type": "nf4",
38
+ "bnb_4bit_use_double_quant": true,
39
+ "lora_target_modules": [
40
+ "down_proj",
41
+ "k_proj",
42
+ "o_proj",
43
+ "up_proj",
44
+ "gate_proj",
45
+ "v_proj",
46
+ "q_proj"
47
+ ],
48
+ "lora_rank": 8,
49
+ "lora_alpha": 16,
50
+ "lora_dropout_p": 0.05,
51
+ "neftune_alpha": 0.0,
52
+ "gradient_checkpointing": true,
53
+ "deepspeed_config_path": "/home/aiscuser/Swift-Scripts/config/zero2.json",
54
+ "batch_size": 1,
55
+ "eval_batch_size": 1,
56
+ "num_train_epochs": 4,
57
+ "max_steps": -1,
58
+ "optim": "adamw_torch",
59
+ "learning_rate": 0.0001,
60
+ "weight_decay": 0.01,
61
+ "gradient_accumulation_steps": 8,
62
+ "max_grad_norm": 0.5,
63
+ "predict_with_generate": false,
64
+ "lr_scheduler_type": "linear",
65
+ "warmup_ratio": 0.03,
66
+ "eval_steps": 100,
67
+ "save_steps": 100,
68
+ "only_save_model": true,
69
+ "save_total_limit": null,
70
+ "logging_steps": 100,
71
+ "dataloader_num_workers": 1,
72
+ "push_to_hub": false,
73
+ "hub_model_id": "solar-10-7b-instruct-v1-lora",
74
+ "hub_private_repo": true,
75
+ "push_hub_strategy": "push_best",
76
+ "hub_token": null,
77
+ "test_oom_error": false,
78
+ "disable_tqdm": false,
79
+ "lazy_tokenize": false,
80
+ "preprocess_num_proc": 1,
81
+ "use_flash_attn": null,
82
+ "ignore_args_error": false,
83
+ "logging_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530/runs",
84
+ "report_to": [
85
+ "all"
86
+ ],
87
+ "check_model_is_latest": false,
88
+ "acc_strategy": "token",
89
+ "save_on_each_node": true,
90
+ "save_strategy": "steps",
91
+ "save_safetensors": true,
92
+ "max_new_tokens": 2048,
93
+ "do_sample": true,
94
+ "temperature": 0.3,
95
+ "top_k": 20,
96
+ "top_p": 0.7,
97
+ "repetition_penalty": 1.05,
98
+ "torch_dtype": "torch.float16",
99
+ "fp16": true,
100
+ "bf16": false,
101
+ "bnb_4bit_compute_dtype": "torch.float16",
102
+ "load_in_4bit": false,
103
+ "load_in_8bit": false,
104
+ "train_sampler_random": true,
105
+ "deepspeed": {
106
+ "train_batch_size": "auto",
107
+ "train_micro_batch_size_per_gpu": "auto",
108
+ "gradient_accumulation_steps": "auto",
109
+ "gradient_clipping": "auto",
110
+ "zero_allow_untested_optimizer": true,
111
+ "fp16": {
112
+ "enabled": "auto",
113
+ "loss_scale": 0,
114
+ "initial_scale_power": 16,
115
+ "loss_scale_window": 1000,
116
+ "hysteresis": 2,
117
+ "min_loss_scale": 1
118
+ },
119
+ "zero_optimization": {
120
+ "stage": 2,
121
+ "allgather_partitions": true,
122
+ "allgather_bucket_size": 500000000.0,
123
+ "reduce_scatter": true,
124
+ "reduce_bucket_size": 500000000.0,
125
+ "overlap_comm": false,
126
+ "contiguous_gradients": true
127
+ }
128
+ }
129
+ }
checkpoints/v13-20240202-072530/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoints/v13-20240202-072530/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/v13-20240202-072530/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoints/v13-20240202-072530/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }
checkpoints/v13-20240202-072530/trainer_state.json ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.69132841,
3
+ "best_model_checkpoint": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530/checkpoint-1600",
4
+ "epoch": 1.9144144144144144,
5
+ "eval_steps": 100,
6
+ "global_step": 1700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "acc": 0.74863632,
13
+ "epoch": 0.11,
14
+ "learning_rate": 9.158878504672898e-05,
15
+ "loss": 0.96384094,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.11,
20
+ "eval_acc": 0.7936049620361458,
21
+ "eval_loss": 0.7536066770553589,
22
+ "eval_runtime": 42.7757,
23
+ "eval_samples_per_second": 3.366,
24
+ "eval_steps_per_second": 1.683,
25
+ "step": 100
26
+ },
27
+ {
28
+ "acc": 0.78036346,
29
+ "epoch": 0.23,
30
+ "learning_rate": 9.738751814223513e-05,
31
+ "loss": 0.77767517,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.23,
36
+ "eval_acc": 0.7983638113570741,
37
+ "eval_loss": 0.7331455945968628,
38
+ "eval_runtime": 42.7344,
39
+ "eval_samples_per_second": 3.37,
40
+ "eval_steps_per_second": 1.685,
41
+ "step": 200
42
+ },
43
+ {
44
+ "acc": 0.78766861,
45
+ "epoch": 0.34,
46
+ "learning_rate": 9.448476052249638e-05,
47
+ "loss": 0.74959351,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 0.34,
52
+ "eval_acc": 0.8007432360175383,
53
+ "eval_loss": 0.721094012260437,
54
+ "eval_runtime": 42.8657,
55
+ "eval_samples_per_second": 3.359,
56
+ "eval_steps_per_second": 1.68,
57
+ "step": 300
58
+ },
59
+ {
60
+ "acc": 0.79140198,
61
+ "epoch": 0.45,
62
+ "learning_rate": 9.158200290275763e-05,
63
+ "loss": 0.73811386,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 0.45,
68
+ "eval_acc": 0.8014650839482408,
69
+ "eval_loss": 0.7137336730957031,
70
+ "eval_runtime": 42.7833,
71
+ "eval_samples_per_second": 3.366,
72
+ "eval_steps_per_second": 1.683,
73
+ "step": 400
74
+ },
75
+ {
76
+ "acc": 0.79137726,
77
+ "epoch": 0.56,
78
+ "learning_rate": 8.867924528301888e-05,
79
+ "loss": 0.73802383,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.56,
84
+ "eval_acc": 0.8026948989412896,
85
+ "eval_loss": 0.7035187482833862,
86
+ "eval_runtime": 42.8037,
87
+ "eval_samples_per_second": 3.364,
88
+ "eval_steps_per_second": 1.682,
89
+ "step": 500
90
+ },
91
+ {
92
+ "acc": 0.79086266,
93
+ "epoch": 0.68,
94
+ "learning_rate": 8.577648766328012e-05,
95
+ "loss": 0.73371964,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.68,
100
+ "eval_acc": 0.8036573628488932,
101
+ "eval_loss": 0.7004870772361755,
102
+ "eval_runtime": 42.738,
103
+ "eval_samples_per_second": 3.369,
104
+ "eval_steps_per_second": 1.685,
105
+ "step": 600
106
+ },
107
+ {
108
+ "acc": 0.79403992,
109
+ "epoch": 0.79,
110
+ "learning_rate": 8.290275761973875e-05,
111
+ "loss": 0.72441986,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.79,
116
+ "eval_acc": 0.8039781841514276,
117
+ "eval_loss": 0.6986453533172607,
118
+ "eval_runtime": 42.7916,
119
+ "eval_samples_per_second": 3.365,
120
+ "eval_steps_per_second": 1.683,
121
+ "step": 700
122
+ },
123
+ {
124
+ "acc": 0.79215607,
125
+ "epoch": 0.9,
126
+ "learning_rate": 8e-05,
127
+ "loss": 0.72639374,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.9,
132
+ "eval_acc": 0.8061437279435355,
133
+ "eval_loss": 0.6950626373291016,
134
+ "eval_runtime": 42.7835,
135
+ "eval_samples_per_second": 3.366,
136
+ "eval_steps_per_second": 1.683,
137
+ "step": 800
138
+ },
139
+ {
140
+ "acc": 0.79285507,
141
+ "epoch": 1.01,
142
+ "learning_rate": 7.709724238026124e-05,
143
+ "loss": 0.72425797,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 1.01,
148
+ "eval_acc": 0.8062774034862582,
149
+ "eval_loss": 0.7004315257072449,
150
+ "eval_runtime": 42.8077,
151
+ "eval_samples_per_second": 3.364,
152
+ "eval_steps_per_second": 1.682,
153
+ "step": 900
154
+ },
155
+ {
156
+ "acc": 0.81319962,
157
+ "epoch": 1.13,
158
+ "learning_rate": 7.41944847605225e-05,
159
+ "loss": 0.63495399,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 1.13,
164
+ "eval_acc": 0.804539621430863,
165
+ "eval_loss": 0.6987484693527222,
166
+ "eval_runtime": 42.8689,
167
+ "eval_samples_per_second": 3.359,
168
+ "eval_steps_per_second": 1.68,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "acc": 0.81136383,
173
+ "epoch": 1.24,
174
+ "learning_rate": 7.129172714078375e-05,
175
+ "loss": 0.63906494,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 1.24,
180
+ "eval_acc": 0.8052614693615656,
181
+ "eval_loss": 0.6985421776771545,
182
+ "eval_runtime": 42.8262,
183
+ "eval_samples_per_second": 3.362,
184
+ "eval_steps_per_second": 1.681,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "acc": 0.81388062,
189
+ "epoch": 1.35,
190
+ "learning_rate": 6.8388969521045e-05,
191
+ "loss": 0.63106087,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 1.35,
196
+ "eval_acc": 0.8074002780451288,
197
+ "eval_loss": 0.6972200274467468,
198
+ "eval_runtime": 42.7351,
199
+ "eval_samples_per_second": 3.37,
200
+ "eval_steps_per_second": 1.685,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "acc": 0.81398033,
205
+ "epoch": 1.46,
206
+ "learning_rate": 6.548621190130625e-05,
207
+ "loss": 0.63091103,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 1.46,
212
+ "eval_acc": 0.807854774890386,
213
+ "eval_loss": 0.6948702931404114,
214
+ "eval_runtime": 42.8184,
215
+ "eval_samples_per_second": 3.363,
216
+ "eval_steps_per_second": 1.682,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "acc": 0.81455872,
221
+ "epoch": 1.58,
222
+ "learning_rate": 6.258345428156749e-05,
223
+ "loss": 0.62791916,
224
+ "step": 1400
225
+ },
226
+ {
227
+ "epoch": 1.58,
228
+ "eval_acc": 0.8081755961929206,
229
+ "eval_loss": 0.6945727467536926,
230
+ "eval_runtime": 42.7416,
231
+ "eval_samples_per_second": 3.369,
232
+ "eval_steps_per_second": 1.685,
233
+ "step": 1400
234
+ },
235
+ {
236
+ "acc": 0.81421364,
237
+ "epoch": 1.69,
238
+ "learning_rate": 5.968069666182874e-05,
239
+ "loss": 0.62950912,
240
+ "step": 1500
241
+ },
242
+ {
243
+ "epoch": 1.69,
244
+ "eval_acc": 0.8069992514169607,
245
+ "eval_loss": 0.692737340927124,
246
+ "eval_runtime": 42.8216,
247
+ "eval_samples_per_second": 3.363,
248
+ "eval_steps_per_second": 1.681,
249
+ "step": 1500
250
+ },
251
+ {
252
+ "acc": 0.81544525,
253
+ "epoch": 1.8,
254
+ "learning_rate": 5.6777939042089986e-05,
255
+ "loss": 0.63058929,
256
+ "step": 1600
257
+ },
258
+ {
259
+ "epoch": 1.8,
260
+ "eval_acc": 0.8083894770612768,
261
+ "eval_loss": 0.6913284063339233,
262
+ "eval_runtime": 42.8047,
263
+ "eval_samples_per_second": 3.364,
264
+ "eval_steps_per_second": 1.682,
265
+ "step": 1600
266
+ },
267
+ {
268
+ "acc": 0.81801094,
269
+ "epoch": 1.91,
270
+ "learning_rate": 5.387518142235124e-05,
271
+ "loss": 0.61622761,
272
+ "step": 1700
273
+ },
274
+ {
275
+ "epoch": 1.91,
276
+ "eval_acc": 0.8079349802160197,
277
+ "eval_loss": 0.6917322874069214,
278
+ "eval_runtime": 42.7927,
279
+ "eval_samples_per_second": 3.365,
280
+ "eval_steps_per_second": 1.683,
281
+ "step": 1700
282
+ }
283
+ ],
284
+ "logging_steps": 100,
285
+ "max_steps": 3552,
286
+ "num_input_tokens_seen": 0,
287
+ "num_train_epochs": 4,
288
+ "save_steps": 100,
289
+ "total_flos": 2.357686760579793e+18,
290
+ "train_batch_size": 1,
291
+ "trial_name": null,
292
+ "trial_params": null
293
+ }
checkpoints/v13-20240202-072530/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0e739e218b320d75a6767aaeeec34986b2dc66c1f5b4f77b76cadee550865a3
3
+ size 8120
checkpoints/v13-20240206-111010/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training procedure
2
+
3
+ ### Framework versions
4
+
5
+
6
+ - SWIFT 1.5.2
7
+ ### Base model information
8
+
9
+
10
+ - BaseModel Class LlamaForCausalLM
checkpoints/v13-20240206-111010/configuration.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_cfg": {
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "sft_type": "lora",
6
+ "tuner_backend": "swift",
7
+ "template_type": "llama",
8
+ "dtype": "fp16",
9
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
10
+ }
11
+ }
checkpoints/v13-20240206-111010/default/adapter_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": null,
5
+ "bias": "none",
6
+ "enable_lora": null,
7
+ "fan_in_fan_out": false,
8
+ "inference_mode": false,
9
+ "init_lora_weights": true,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "lora_dtype": "fp32",
16
+ "megatron_config": null,
17
+ "megatron_core": "megatron.core",
18
+ "modules_to_save": [],
19
+ "peft_type": null,
20
+ "r": 8,
21
+ "rank_pattern": {},
22
+ "revision": null,
23
+ "swift_type": "LORA",
24
+ "target_modules": [
25
+ "up_proj",
26
+ "gate_proj",
27
+ "q_proj",
28
+ "down_proj",
29
+ "v_proj",
30
+ "o_proj",
31
+ "k_proj"
32
+ ],
33
+ "task_type": null,
34
+ "use_merged_linear": false,
35
+ "use_qa_lora": false
36
+ }
checkpoints/v13-20240206-111010/default/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17b5c79fce9c77d700d912290deded4a49c40a1f14269f9d99d900eb411a70cd
3
+ size 125912272
checkpoints/v13-20240206-111010/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "eos_token_id": 2,
3
+ "max_new_tokens": 512,
4
+ "pad_token_id": 2,
5
+ "repetition_penalty": 0.99,
6
+ "transformers_version": "4.37.2"
7
+ }
checkpoints/v13-20240206-111010/sft_args.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "solar-10-7b-instruct-v1",
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
6
+ "sft_type": "lora",
7
+ "freeze_parameters": 0.0,
8
+ "additional_trainable_parameters": [],
9
+ "tuner_backend": "swift",
10
+ "template_type": "llama",
11
+ "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010",
12
+ "add_output_dir_suffix": true,
13
+ "ddp_backend": "nccl",
14
+ "seed": 42,
15
+ "resume_from_checkpoint": null,
16
+ "dtype": "fp16",
17
+ "dataset": [
18
+ "_custom_dataset"
19
+ ],
20
+ "dataset_seed": 42,
21
+ "dataset_test_ratio": 0.01,
22
+ "train_dataset_sample": -1,
23
+ "val_dataset_sample": null,
24
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
25
+ "max_length": 3072,
26
+ "truncation_strategy": "delete",
27
+ "check_dataset_strategy": "warning",
28
+ "custom_train_dataset_path": [
29
+ "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/phase_1/best_eval_1.00/release_train_data.json"
30
+ ],
31
+ "custom_val_dataset_path": [],
32
+ "self_cognition_sample": 0,
33
+ "model_name": [
34
+ null,
35
+ null
36
+ ],
37
+ "model_author": [
38
+ null,
39
+ null
40
+ ],
41
+ "quantization_bit": 0,
42
+ "bnb_4bit_comp_dtype": "fp16",
43
+ "bnb_4bit_quant_type": "nf4",
44
+ "bnb_4bit_use_double_quant": true,
45
+ "lora_target_modules": [
46
+ "up_proj",
47
+ "gate_proj",
48
+ "q_proj",
49
+ "down_proj",
50
+ "v_proj",
51
+ "o_proj",
52
+ "k_proj"
53
+ ],
54
+ "lora_rank": 8,
55
+ "lora_alpha": 16,
56
+ "lora_dropout_p": 0.05,
57
+ "lora_bias_trainable": "none",
58
+ "lora_modules_to_save": [],
59
+ "lora_dtype": "fp32",
60
+ "neftune_alpha": 0.0,
61
+ "gradient_checkpointing": true,
62
+ "deepspeed_config_path": null,
63
+ "batch_size": 1,
64
+ "eval_batch_size": 1,
65
+ "num_train_epochs": 4,
66
+ "max_steps": -1,
67
+ "optim": "adamw_torch",
68
+ "adam_beta1": 0.9,
69
+ "adam_beta2": 0.999,
70
+ "learning_rate": 0.0001,
71
+ "weight_decay": 0.01,
72
+ "gradient_accumulation_steps": 8,
73
+ "max_grad_norm": 0.5,
74
+ "predict_with_generate": false,
75
+ "lr_scheduler_type": "linear",
76
+ "warmup_ratio": 0.03,
77
+ "eval_steps": 1700,
78
+ "save_steps": 1700,
79
+ "save_only_model": true,
80
+ "save_total_limit": null,
81
+ "logging_steps": 100,
82
+ "dataloader_num_workers": 1,
83
+ "push_to_hub": false,
84
+ "hub_model_id": "solar-10-7b-instruct-v1-lora",
85
+ "hub_private_repo": true,
86
+ "push_hub_strategy": "push_best",
87
+ "hub_token": null,
88
+ "test_oom_error": false,
89
+ "disable_tqdm": false,
90
+ "lazy_tokenize": false,
91
+ "preprocess_num_proc": 1,
92
+ "use_flash_attn": null,
93
+ "ignore_args_error": false,
94
+ "check_model_is_latest": false,
95
+ "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010/runs",
96
+ "report_to": [
97
+ "all"
98
+ ],
99
+ "acc_strategy": "token",
100
+ "save_on_each_node": true,
101
+ "save_strategy": "steps",
102
+ "save_safetensors": true,
103
+ "max_new_tokens": 2048,
104
+ "do_sample": true,
105
+ "temperature": 0.3,
106
+ "top_k": 20,
107
+ "top_p": 0.7,
108
+ "repetition_penalty": 1.05,
109
+ "num_beams": 1,
110
+ "only_save_model": true,
111
+ "torch_dtype": "torch.float16",
112
+ "fp16": true,
113
+ "bf16": false,
114
+ "bnb_4bit_compute_dtype": "torch.float16",
115
+ "load_in_4bit": false,
116
+ "load_in_8bit": false,
117
+ "train_sampler_random": true,
118
+ "deepspeed": null
119
+ }
checkpoints/v13-20240206-111010/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
checkpoints/v13-20240206-111010/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/v13-20240206-111010/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
checkpoints/v13-20240206-111010/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n' + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }
checkpoints/v13-20240206-111010/trainer_state.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.68276149,
3
+ "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010/checkpoint-1700",
4
+ "epoch": 1.9144144144144144,
5
+ "eval_steps": 1700,
6
+ "global_step": 1700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "acc": 0.67516661,
13
+ "epoch": 0.0,
14
+ "learning_rate": 9.345794392523364e-07,
15
+ "loss": 1.46797299,
16
+ "step": 1
17
+ },
18
+ {
19
+ "acc": 0.75082575,
20
+ "epoch": 0.11,
21
+ "learning_rate": 9.252336448598131e-05,
22
+ "loss": 0.95137394,
23
+ "step": 100
24
+ },
25
+ {
26
+ "acc": 0.78081596,
27
+ "epoch": 0.23,
28
+ "learning_rate": 9.732946298984034e-05,
29
+ "loss": 0.77715546,
30
+ "step": 200
31
+ },
32
+ {
33
+ "acc": 0.7879702,
34
+ "epoch": 0.34,
35
+ "learning_rate": 9.44267053701016e-05,
36
+ "loss": 0.7487645,
37
+ "step": 300
38
+ },
39
+ {
40
+ "acc": 0.79198853,
41
+ "epoch": 0.45,
42
+ "learning_rate": 9.152394775036285e-05,
43
+ "loss": 0.73725586,
44
+ "step": 400
45
+ },
46
+ {
47
+ "acc": 0.79181892,
48
+ "epoch": 0.56,
49
+ "learning_rate": 8.86211901306241e-05,
50
+ "loss": 0.73659386,
51
+ "step": 500
52
+ },
53
+ {
54
+ "acc": 0.791054,
55
+ "epoch": 0.68,
56
+ "learning_rate": 8.571843251088535e-05,
57
+ "loss": 0.73336884,
58
+ "step": 600
59
+ },
60
+ {
61
+ "acc": 0.79418472,
62
+ "epoch": 0.79,
63
+ "learning_rate": 8.284470246734399e-05,
64
+ "loss": 0.72362808,
65
+ "step": 700
66
+ },
67
+ {
68
+ "acc": 0.79279587,
69
+ "epoch": 0.9,
70
+ "learning_rate": 7.994194484760524e-05,
71
+ "loss": 0.72401505,
72
+ "step": 800
73
+ },
74
+ {
75
+ "acc": 0.79312164,
76
+ "epoch": 1.01,
77
+ "learning_rate": 7.703918722786648e-05,
78
+ "loss": 0.7227565,
79
+ "step": 900
80
+ },
81
+ {
82
+ "acc": 0.8135437,
83
+ "epoch": 1.13,
84
+ "learning_rate": 7.413642960812773e-05,
85
+ "loss": 0.63391575,
86
+ "step": 1000
87
+ },
88
+ {
89
+ "acc": 0.81166725,
90
+ "epoch": 1.24,
91
+ "learning_rate": 7.123367198838897e-05,
92
+ "loss": 0.63856529,
93
+ "step": 1100
94
+ },
95
+ {
96
+ "acc": 0.81463379,
97
+ "epoch": 1.35,
98
+ "learning_rate": 6.833091436865022e-05,
99
+ "loss": 0.63027618,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "acc": 0.81466286,
104
+ "epoch": 1.46,
105
+ "learning_rate": 6.542815674891147e-05,
106
+ "loss": 0.62910736,
107
+ "step": 1300
108
+ },
109
+ {
110
+ "acc": 0.81553825,
111
+ "epoch": 1.58,
112
+ "learning_rate": 6.252539912917271e-05,
113
+ "loss": 0.62564072,
114
+ "step": 1400
115
+ },
116
+ {
117
+ "acc": 0.81496376,
118
+ "epoch": 1.69,
119
+ "learning_rate": 5.9622641509433966e-05,
120
+ "loss": 0.62815582,
121
+ "step": 1500
122
+ },
123
+ {
124
+ "acc": 0.81624107,
125
+ "epoch": 1.8,
126
+ "learning_rate": 5.671988388969521e-05,
127
+ "loss": 0.6277507,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "acc": 0.81836472,
132
+ "epoch": 1.91,
133
+ "learning_rate": 5.381712626995646e-05,
134
+ "loss": 0.61489536,
135
+ "step": 1700
136
+ },
137
+ {
138
+ "epoch": 1.91,
139
+ "eval_acc": 0.8124290204157093,
140
+ "eval_loss": 0.6827614903450012,
141
+ "eval_runtime": 19.6247,
142
+ "eval_samples_per_second": 7.338,
143
+ "eval_steps_per_second": 3.669,
144
+ "step": 1700
145
+ }
146
+ ],
147
+ "logging_steps": 100,
148
+ "max_steps": 3552,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 4,
151
+ "save_steps": 1700,
152
+ "total_flos": 2.357705064388231e+18,
153
+ "train_batch_size": 1,
154
+ "trial_name": null,
155
+ "trial_params": null
156
+ }
checkpoints/v13-20240206-111010/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4aca4f9bd7970195cea06fd363091f42810a0b49c8bf34e182632eb6bf1e90e
3
+ size 6584
checkpoints/v16-20240206-224659/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Training procedure
2
+
3
+ ### Framework versions
4
+
5
+
6
+ - SWIFT 1.5.2
7
+ ### Base model information
8
+
9
+
10
+ - BaseModel Class LlamaForCausalLM
checkpoints/v16-20240206-224659/configuration.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_cfg": {
3
+ "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
4
+ "model_revision": "master",
5
+ "sft_type": "lora",
6
+ "tuner_backend": "swift",
7
+ "template_type": "llama",
8
+ "dtype": "fp16",
9
+ "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
10
+ }
11
+ }