zhangzhao219 commited on Feb 16

Commit

a542899

•

1 Parent(s): 10bfd24

Upload 96 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoints/v08-20240205-114459/README.md +10 -0
checkpoints/v08-20240205-114459/configuration.json +11 -0
checkpoints/v08-20240205-114459/default/adapter_config.json +36 -0
checkpoints/v08-20240205-114459/default/adapter_model.safetensors +3 -0
checkpoints/v08-20240205-114459/generation_config.json +7 -0
checkpoints/v08-20240205-114459/sft_args.json +119 -0
checkpoints/v08-20240205-114459/special_tokens_map.json +30 -0
checkpoints/v08-20240205-114459/tokenizer.json +0 -0
checkpoints/v08-20240205-114459/tokenizer.model +3 -0
checkpoints/v08-20240205-114459/tokenizer_config.json +43 -0
checkpoints/v08-20240205-114459/trainer_state.json +156 -0
checkpoints/v08-20240205-114459/training_args.bin +3 -0
checkpoints/v10-20240205-114325/README.md +10 -0
checkpoints/v10-20240205-114325/configuration.json +11 -0
checkpoints/v10-20240205-114325/default/adapter_config.json +36 -0
checkpoints/v10-20240205-114325/default/adapter_model.safetensors +3 -0
checkpoints/v10-20240205-114325/generation_config.json +7 -0
checkpoints/v10-20240205-114325/sft_args.json +119 -0
checkpoints/v10-20240205-114325/special_tokens_map.json +30 -0
checkpoints/v10-20240205-114325/tokenizer.json +0 -0
checkpoints/v10-20240205-114325/tokenizer.model +3 -0
checkpoints/v10-20240205-114325/tokenizer_config.json +43 -0
checkpoints/v10-20240205-114325/trainer_state.json +156 -0
checkpoints/v10-20240205-114325/training_args.bin +3 -0
checkpoints/v13-20240202-072530/README.md +10 -0
checkpoints/v13-20240202-072530/configuration.json +11 -0
checkpoints/v13-20240202-072530/default/adapter_config.json +35 -0
checkpoints/v13-20240202-072530/default/adapter_model.safetensors +3 -0
checkpoints/v13-20240202-072530/generation_config.json +7 -0
checkpoints/v13-20240202-072530/sft_args.json +129 -0
checkpoints/v13-20240202-072530/special_tokens_map.json +30 -0
checkpoints/v13-20240202-072530/tokenizer.json +0 -0
checkpoints/v13-20240202-072530/tokenizer.model +3 -0
checkpoints/v13-20240202-072530/tokenizer_config.json +43 -0
checkpoints/v13-20240202-072530/trainer_state.json +293 -0
checkpoints/v13-20240202-072530/training_args.bin +3 -0
checkpoints/v13-20240206-111010/README.md +10 -0
checkpoints/v13-20240206-111010/configuration.json +11 -0
checkpoints/v13-20240206-111010/default/adapter_config.json +36 -0
checkpoints/v13-20240206-111010/default/adapter_model.safetensors +3 -0
checkpoints/v13-20240206-111010/generation_config.json +7 -0
checkpoints/v13-20240206-111010/sft_args.json +119 -0
checkpoints/v13-20240206-111010/special_tokens_map.json +30 -0
checkpoints/v13-20240206-111010/tokenizer.json +0 -0
checkpoints/v13-20240206-111010/tokenizer.model +3 -0
checkpoints/v13-20240206-111010/tokenizer_config.json +43 -0
checkpoints/v13-20240206-111010/trainer_state.json +156 -0
checkpoints/v13-20240206-111010/training_args.bin +3 -0
checkpoints/v16-20240206-224659/README.md +10 -0
checkpoints/v16-20240206-224659/configuration.json +11 -0

checkpoints/v08-20240205-114459/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+## Training procedure
+### Framework versions
+- SWIFT 1.5.2
+### Base model information
+- BaseModel Class LlamaForCausalLM

checkpoints/v08-20240205-114459/configuration.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "adapter_cfg": {
+        "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+        "model_revision": "master",
+        "sft_type": "lora",
+        "tuner_backend": "swift",
+        "template_type": "llama",
+        "dtype": "fp16",
+        "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    }
+}

checkpoints/v08-20240205-114459/default/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "lora_dtype": "fp32",
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [],
+  "peft_type": null,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "swift_type": "LORA",
+  "target_modules": [
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj",
+    "gate_proj",
+    "k_proj"
+  ],
+  "task_type": null,
+  "use_merged_linear": false,
+  "use_qa_lora": false
+}

checkpoints/v08-20240205-114459/default/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d7357403b5379ca631fc54f34056c390fb37172056a7d1fc9e68d50205ad0c8
+size 125912272

checkpoints/v08-20240205-114459/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "eos_token_id": 2,
+  "max_new_tokens": 512,
+  "pad_token_id": 2,
+  "repetition_penalty": 0.99,
+  "transformers_version": "4.37.2"
+}

checkpoints/v08-20240205-114459/sft_args.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "model_type": "solar-10-7b-instruct-v1",
+  "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+  "model_revision": "master",
+  "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
+  "sft_type": "lora",
+  "freeze_parameters": 0.0,
+  "additional_trainable_parameters": [],
+  "tuner_backend": "swift",
+  "template_type": "llama",
+  "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459",
+  "add_output_dir_suffix": true,
+  "ddp_backend": "nccl",
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "dtype": "fp16",
+  "dataset": [
+    "_custom_dataset"
+  ],
+  "dataset_seed": 42,
+  "dataset_test_ratio": 0.01,
+  "train_dataset_sample": -1,
+  "val_dataset_sample": null,
+  "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+  "max_length": 3072,
+  "truncation_strategy": "delete",
+  "check_dataset_strategy": "warning",
+  "custom_train_dataset_path": [
+    "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/1.01/release_train_data.json"
+  ],
+  "custom_val_dataset_path": [],
+  "self_cognition_sample": 0,
+  "model_name": [
+    null,
+    null
+  ],
+  "model_author": [
+    null,
+    null
+  ],
+  "quantization_bit": 0,
+  "bnb_4bit_comp_dtype": "fp16",
+  "bnb_4bit_quant_type": "nf4",
+  "bnb_4bit_use_double_quant": true,
+  "lora_target_modules": [
+    "o_proj",
+    "down_proj",
+    "q_proj",
+    "up_proj",
+    "v_proj",
+    "gate_proj",
+    "k_proj"
+  ],
+  "lora_rank": 8,
+  "lora_alpha": 16,
+  "lora_dropout_p": 0.05,
+  "lora_bias_trainable": "none",
+  "lora_modules_to_save": [],
+  "lora_dtype": "fp32",
+  "neftune_alpha": 0.0,
+  "gradient_checkpointing": true,
+  "deepspeed_config_path": null,
+  "batch_size": 1,
+  "eval_batch_size": 1,
+  "num_train_epochs": 4,
+  "max_steps": -1,
+  "optim": "adamw_torch",
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "learning_rate": 0.0001,
+  "weight_decay": 0.01,
+  "gradient_accumulation_steps": 8,
+  "max_grad_norm": 0.5,
+  "predict_with_generate": false,
+  "lr_scheduler_type": "linear",
+  "warmup_ratio": 0.03,
+  "eval_steps": 1700,
+  "save_steps": 1700,
+  "save_only_model": true,
+  "save_total_limit": null,
+  "logging_steps": 100,
+  "dataloader_num_workers": 1,
+  "push_to_hub": false,
+  "hub_model_id": "solar-10-7b-instruct-v1-lora",
+  "hub_private_repo": true,
+  "push_hub_strategy": "push_best",
+  "hub_token": null,
+  "test_oom_error": false,
+  "disable_tqdm": false,
+  "lazy_tokenize": false,
+  "preprocess_num_proc": 1,
+  "use_flash_attn": null,
+  "ignore_args_error": false,
+  "check_model_is_latest": false,
+  "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459/runs",
+  "report_to": [
+    "all"
+  ],
+  "acc_strategy": "token",
+  "save_on_each_node": true,
+  "save_strategy": "steps",
+  "save_safetensors": true,
+  "max_new_tokens": 2048,
+  "do_sample": true,
+  "temperature": 0.3,
+  "top_k": 20,
+  "top_p": 0.7,
+  "repetition_penalty": 1.05,
+  "num_beams": 1,
+  "only_save_model": true,
+  "torch_dtype": "torch.float16",
+  "fp16": true,
+  "bf16": false,
+  "bnb_4bit_compute_dtype": "torch.float16",
+  "load_in_4bit": false,
+  "load_in_8bit": false,
+  "train_sampler_random": true,
+  "deepspeed": null
+}

checkpoints/v08-20240205-114459/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/v08-20240205-114459/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/v08-20240205-114459/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

checkpoints/v08-20240205-114459/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n'  + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

checkpoints/v08-20240205-114459/trainer_state.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "best_metric": 0.69346523,
+  "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v8-20240205-114459/checkpoint-1700",
+  "epoch": 1.9144144144144144,
+  "eval_steps": 1700,
+  "global_step": 1700,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "acc": 0.67516661,
+      "epoch": 0.0,
+      "learning_rate": 9.345794392523364e-07,
+      "loss": 1.46797299,
+      "step": 1
+    },
+    {
+      "acc": 0.74959772,
+      "epoch": 0.11,
+      "learning_rate": 9.158878504672898e-05,
+      "loss": 0.95736756,
+      "step": 100
+    },
+    {
+      "acc": 0.78039505,
+      "epoch": 0.23,
+      "learning_rate": 9.735849056603774e-05,
+      "loss": 0.77719513,
+      "step": 200
+    },
+    {
+      "acc": 0.78730965,
+      "epoch": 0.34,
+      "learning_rate": 9.4455732946299e-05,
+      "loss": 0.74986748,
+      "step": 300
+    },
+    {
+      "acc": 0.79136169,
+      "epoch": 0.45,
+      "learning_rate": 9.155297532656024e-05,
+      "loss": 0.73882896,
+      "step": 400
+    },
+    {
+      "acc": 0.79132759,
+      "epoch": 0.56,
+      "learning_rate": 8.865021770682148e-05,
+      "loss": 0.7381105,
+      "step": 500
+    },
+    {
+      "acc": 0.79090462,
+      "epoch": 0.68,
+      "learning_rate": 8.574746008708273e-05,
+      "loss": 0.73391434,
+      "step": 600
+    },
+    {
+      "acc": 0.79388229,
+      "epoch": 0.79,
+      "learning_rate": 8.284470246734399e-05,
+      "loss": 0.72463707,
+      "step": 700
+    },
+    {
+      "acc": 0.79177132,
+      "epoch": 0.9,
+      "learning_rate": 7.994194484760524e-05,
+      "loss": 0.72666443,
+      "step": 800
+    },
+    {
+      "acc": 0.79278763,
+      "epoch": 1.01,
+      "learning_rate": 7.703918722786648e-05,
+      "loss": 0.72437874,
+      "step": 900
+    },
+    {
+      "acc": 0.81367294,
+      "epoch": 1.13,
+      "learning_rate": 7.413642960812773e-05,
+      "loss": 0.63475815,
+      "step": 1000
+    },
+    {
+      "acc": 0.81142548,
+      "epoch": 1.24,
+      "learning_rate": 7.123367198838897e-05,
+      "loss": 0.63902611,
+      "step": 1100
+    },
+    {
+      "acc": 0.81469452,
+      "epoch": 1.35,
+      "learning_rate": 6.833091436865022e-05,
+      "loss": 0.63060787,
+      "step": 1200
+    },
+    {
+      "acc": 0.81454559,
+      "epoch": 1.46,
+      "learning_rate": 6.542815674891147e-05,
+      "loss": 0.63029087,
+      "step": 1300
+    },
+    {
+      "acc": 0.81508331,
+      "epoch": 1.58,
+      "learning_rate": 6.252539912917271e-05,
+      "loss": 0.62843025,
+      "step": 1400
+    },
+    {
+      "acc": 0.81447418,
+      "epoch": 1.69,
+      "learning_rate": 5.9622641509433966e-05,
+      "loss": 0.62912048,
+      "step": 1500
+    },
+    {
+      "acc": 0.81561844,
+      "epoch": 1.8,
+      "learning_rate": 5.671988388969521e-05,
+      "loss": 0.63019512,
+      "step": 1600
+    },
+    {
+      "acc": 0.8182132,
+      "epoch": 1.91,
+      "learning_rate": 5.381712626995646e-05,
+      "loss": 0.61591331,
+      "step": 1700
+    },
+    {
+      "epoch": 1.91,
+      "eval_acc": 0.8075339535878515,
+      "eval_loss": 0.6934652328491211,
+      "eval_runtime": 19.4269,
+      "eval_samples_per_second": 7.412,
+      "eval_steps_per_second": 3.706,
+      "step": 1700
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 3552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1700,
+  "total_flos": 2.3576867592376156e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/v08-20240205-114459/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40e6e97628354f5c8d33acf937c5402484caf81e09d587f30f86c4f9d252cf64
+size 6584

checkpoints/v10-20240205-114325/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+## Training procedure
+### Framework versions
+- SWIFT 1.5.2
+### Base model information
+- BaseModel Class LlamaForCausalLM

checkpoints/v10-20240205-114325/configuration.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "adapter_cfg": {
+        "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+        "model_revision": "master",
+        "sft_type": "lora",
+        "tuner_backend": "swift",
+        "template_type": "llama",
+        "dtype": "fp16",
+        "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    }
+}

checkpoints/v10-20240205-114325/default/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "lora_dtype": "fp32",
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [],
+  "peft_type": null,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "swift_type": "LORA",
+  "target_modules": [
+    "up_proj",
+    "q_proj",
+    "v_proj",
+    "o_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "task_type": null,
+  "use_merged_linear": false,
+  "use_qa_lora": false
+}

checkpoints/v10-20240205-114325/default/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29d656c815960e6df076b50a811fc80e221919ab9a47b175dfeee5fa5c08acca
+size 125912272

checkpoints/v10-20240205-114325/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "eos_token_id": 2,
+  "max_new_tokens": 512,
+  "pad_token_id": 2,
+  "repetition_penalty": 0.99,
+  "transformers_version": "4.37.2"
+}

checkpoints/v10-20240205-114325/sft_args.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "model_type": "solar-10-7b-instruct-v1",
+  "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+  "model_revision": "master",
+  "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
+  "sft_type": "lora",
+  "freeze_parameters": 0.0,
+  "additional_trainable_parameters": [],
+  "tuner_backend": "swift",
+  "template_type": "llama",
+  "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325",
+  "add_output_dir_suffix": true,
+  "ddp_backend": "nccl",
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "dtype": "fp16",
+  "dataset": [
+    "_custom_dataset"
+  ],
+  "dataset_seed": 42,
+  "dataset_test_ratio": 0.01,
+  "train_dataset_sample": -1,
+  "val_dataset_sample": null,
+  "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+  "max_length": 3072,
+  "truncation_strategy": "delete",
+  "check_dataset_strategy": "warning",
+  "custom_train_dataset_path": [
+    "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/1.01/release_train_data.json"
+  ],
+  "custom_val_dataset_path": [],
+  "self_cognition_sample": 0,
+  "model_name": [
+    null,
+    null
+  ],
+  "model_author": [
+    null,
+    null
+  ],
+  "quantization_bit": 0,
+  "bnb_4bit_comp_dtype": "fp16",
+  "bnb_4bit_quant_type": "nf4",
+  "bnb_4bit_use_double_quant": true,
+  "lora_target_modules": [
+    "up_proj",
+    "q_proj",
+    "v_proj",
+    "o_proj",
+    "k_proj",
+    "gate_proj",
+    "down_proj"
+  ],
+  "lora_rank": 8,
+  "lora_alpha": 16,
+  "lora_dropout_p": 0.05,
+  "lora_bias_trainable": "none",
+  "lora_modules_to_save": [],
+  "lora_dtype": "fp32",
+  "neftune_alpha": 0.0,
+  "gradient_checkpointing": true,
+  "deepspeed_config_path": null,
+  "batch_size": 1,
+  "eval_batch_size": 1,
+  "num_train_epochs": 4,
+  "max_steps": -1,
+  "optim": "adamw_torch",
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "learning_rate": 0.0001,
+  "weight_decay": 0.01,
+  "gradient_accumulation_steps": 8,
+  "max_grad_norm": 0.5,
+  "predict_with_generate": false,
+  "lr_scheduler_type": "linear",
+  "warmup_ratio": 0.03,
+  "eval_steps": 1700,
+  "save_steps": 1700,
+  "save_only_model": true,
+  "save_total_limit": null,
+  "logging_steps": 100,
+  "dataloader_num_workers": 1,
+  "push_to_hub": false,
+  "hub_model_id": "solar-10-7b-instruct-v1-lora",
+  "hub_private_repo": true,
+  "push_hub_strategy": "push_best",
+  "hub_token": null,
+  "test_oom_error": false,
+  "disable_tqdm": false,
+  "lazy_tokenize": false,
+  "preprocess_num_proc": 1,
+  "use_flash_attn": null,
+  "ignore_args_error": false,
+  "check_model_is_latest": false,
+  "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325/runs",
+  "report_to": [
+    "all"
+  ],
+  "acc_strategy": "token",
+  "save_on_each_node": true,
+  "save_strategy": "steps",
+  "save_safetensors": true,
+  "max_new_tokens": 2048,
+  "do_sample": true,
+  "temperature": 0.3,
+  "top_k": 20,
+  "top_p": 0.7,
+  "repetition_penalty": 1.05,
+  "num_beams": 1,
+  "only_save_model": true,
+  "torch_dtype": "torch.float16",
+  "fp16": true,
+  "bf16": false,
+  "bnb_4bit_compute_dtype": "torch.float16",
+  "load_in_4bit": false,
+  "load_in_8bit": false,
+  "train_sampler_random": true,
+  "deepspeed": null
+}

checkpoints/v10-20240205-114325/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/v10-20240205-114325/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/v10-20240205-114325/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

checkpoints/v10-20240205-114325/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n'  + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

checkpoints/v10-20240205-114325/trainer_state.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "best_metric": 0.68919861,
+  "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v10-20240205-114325/checkpoint-1700",
+  "epoch": 1.9144144144144144,
+  "eval_steps": 1700,
+  "global_step": 1700,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "acc": 0.67516661,
+      "epoch": 0.0,
+      "learning_rate": 9.345794392523364e-07,
+      "loss": 1.46797299,
+      "step": 1
+    },
+    {
+      "acc": 0.75035373,
+      "epoch": 0.11,
+      "learning_rate": 9.158878504672898e-05,
+      "loss": 0.95308616,
+      "step": 100
+    },
+    {
+      "acc": 0.78029877,
+      "epoch": 0.23,
+      "learning_rate": 9.735849056603774e-05,
+      "loss": 0.77814468,
+      "step": 200
+    },
+    {
+      "acc": 0.78718307,
+      "epoch": 0.34,
+      "learning_rate": 9.4455732946299e-05,
+      "loss": 0.75001068,
+      "step": 300
+    },
+    {
+      "acc": 0.79127777,
+      "epoch": 0.45,
+      "learning_rate": 9.155297532656024e-05,
+      "loss": 0.73783844,
+      "step": 400
+    },
+    {
+      "acc": 0.79111923,
+      "epoch": 0.56,
+      "learning_rate": 8.865021770682148e-05,
+      "loss": 0.73792175,
+      "step": 500
+    },
+    {
+      "acc": 0.79054779,
+      "epoch": 0.68,
+      "learning_rate": 8.577648766328012e-05,
+      "loss": 0.73389267,
+      "step": 600
+    },
+    {
+      "acc": 0.79359085,
+      "epoch": 0.79,
+      "learning_rate": 8.287373004354137e-05,
+      "loss": 0.72452591,
+      "step": 700
+    },
+    {
+      "acc": 0.79235954,
+      "epoch": 0.9,
+      "learning_rate": 7.997097242380261e-05,
+      "loss": 0.72603645,
+      "step": 800
+    },
+    {
+      "acc": 0.79280067,
+      "epoch": 1.01,
+      "learning_rate": 7.706821480406386e-05,
+      "loss": 0.72392105,
+      "step": 900
+    },
+    {
+      "acc": 0.81310547,
+      "epoch": 1.13,
+      "learning_rate": 7.416545718432511e-05,
+      "loss": 0.63417343,
+      "step": 1000
+    },
+    {
+      "acc": 0.81126122,
+      "epoch": 1.24,
+      "learning_rate": 7.126269956458636e-05,
+      "loss": 0.63897865,
+      "step": 1100
+    },
+    {
+      "acc": 0.81425156,
+      "epoch": 1.35,
+      "learning_rate": 6.83599419448476e-05,
+      "loss": 0.63155251,
+      "step": 1200
+    },
+    {
+      "acc": 0.81428162,
+      "epoch": 1.46,
+      "learning_rate": 6.545718432510885e-05,
+      "loss": 0.63026398,
+      "step": 1300
+    },
+    {
+      "acc": 0.81506256,
+      "epoch": 1.58,
+      "learning_rate": 6.25544267053701e-05,
+      "loss": 0.62718761,
+      "step": 1400
+    },
+    {
+      "acc": 0.81431351,
+      "epoch": 1.69,
+      "learning_rate": 5.965166908563136e-05,
+      "loss": 0.62935677,
+      "step": 1500
+    },
+    {
+      "acc": 0.81549332,
+      "epoch": 1.8,
+      "learning_rate": 5.6748911465892595e-05,
+      "loss": 0.62953285,
+      "step": 1600
+    },
+    {
+      "acc": 0.81804367,
+      "epoch": 1.91,
+      "learning_rate": 5.384615384615385e-05,
+      "loss": 0.61590744,
+      "step": 1700
+    },
+    {
+      "epoch": 1.91,
+      "eval_acc": 0.8087905036894449,
+      "eval_loss": 0.6891986131668091,
+      "eval_runtime": 19.5638,
+      "eval_samples_per_second": 7.361,
+      "eval_steps_per_second": 3.68,
+      "step": 1700
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 3552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1700,
+  "total_flos": 2.3576867592376156e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/v10-20240205-114325/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:512e7e11bc024f99d29cf030056db6d63e94df8c427e85aad6b7706e11907c29
+size 6584

checkpoints/v13-20240202-072530/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+## Training procedure
+### Framework versions
+- SWIFT 1.5.0
+### Base model information
+- BaseModel Class LlamaForCausalLM

checkpoints/v13-20240202-072530/configuration.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "adapter_cfg": {
+        "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+        "model_revision": "master",
+        "sft_type": "lora",
+        "tuner_backend": "swift",
+        "template_type": "llama",
+        "dtype": "fp16",
+        "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    }
+}

checkpoints/v13-20240202-072530/default/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": null,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "swift_type": "LORA",
+  "target_modules": [
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "gate_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": null,
+  "use_merged_linear": false,
+  "use_qa_lora": false
+}

checkpoints/v13-20240202-072530/default/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d8f6611b6316ded5bbacfc99c8015f21eb07858e2419695ba2a61cbc21c3f63
+size 62997320

checkpoints/v13-20240202-072530/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "eos_token_id": 2,
+  "max_new_tokens": 512,
+  "pad_token_id": 2,
+  "repetition_penalty": 0.99,
+  "transformers_version": "4.37.2"
+}

checkpoints/v13-20240202-072530/sft_args.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "model_type": "solar-10-7b-instruct-v1",
+  "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+  "model_revision": "master",
+  "model_cache_dir": "/home/aiscuser/Swift-Scripts/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
+  "sft_type": "lora",
+  "freeze_parameters": 0.0,
+  "tuner_backend": "swift",
+  "template_type": "llama",
+  "output_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530",
+  "add_output_dir_suffix": true,
+  "custom_output_dir_suffix": null,
+  "ddp_backend": "nccl",
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "dtype": "fp16",
+  "dataset": [
+    "_custom_dataset"
+  ],
+  "dataset_seed": 42,
+  "dataset_test_ratio": 0.01,
+  "train_dataset_sample": -1,
+  "val_dataset_sample": null,
+  "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+  "max_length": 3072,
+  "truncation_strategy": "delete",
+  "check_dataset_strategy": "warning",
+  "custom_train_dataset_path": [
+    "/home/aiscuser/Swift-Scripts/data/wsdm/model/Pseudo/best_eval_1.01/release_train_data.json"
+  ],
+  "custom_val_dataset_path": [],
+  "self_cognition_sample": 0,
+  "model_name": null,
+  "model_author": null,
+  "quantization_bit": 0,
+  "bnb_4bit_comp_dtype": "fp16",
+  "bnb_4bit_quant_type": "nf4",
+  "bnb_4bit_use_double_quant": true,
+  "lora_target_modules": [
+    "down_proj",
+    "k_proj",
+    "o_proj",
+    "up_proj",
+    "gate_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "lora_rank": 8,
+  "lora_alpha": 16,
+  "lora_dropout_p": 0.05,
+  "neftune_alpha": 0.0,
+  "gradient_checkpointing": true,
+  "deepspeed_config_path": "/home/aiscuser/Swift-Scripts/config/zero2.json",
+  "batch_size": 1,
+  "eval_batch_size": 1,
+  "num_train_epochs": 4,
+  "max_steps": -1,
+  "optim": "adamw_torch",
+  "learning_rate": 0.0001,
+  "weight_decay": 0.01,
+  "gradient_accumulation_steps": 8,
+  "max_grad_norm": 0.5,
+  "predict_with_generate": false,
+  "lr_scheduler_type": "linear",
+  "warmup_ratio": 0.03,
+  "eval_steps": 100,
+  "save_steps": 100,
+  "only_save_model": true,
+  "save_total_limit": null,
+  "logging_steps": 100,
+  "dataloader_num_workers": 1,
+  "push_to_hub": false,
+  "hub_model_id": "solar-10-7b-instruct-v1-lora",
+  "hub_private_repo": true,
+  "push_hub_strategy": "push_best",
+  "hub_token": null,
+  "test_oom_error": false,
+  "disable_tqdm": false,
+  "lazy_tokenize": false,
+  "preprocess_num_proc": 1,
+  "use_flash_attn": null,
+  "ignore_args_error": false,
+  "logging_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530/runs",
+  "report_to": [
+    "all"
+  ],
+  "check_model_is_latest": false,
+  "acc_strategy": "token",
+  "save_on_each_node": true,
+  "save_strategy": "steps",
+  "save_safetensors": true,
+  "max_new_tokens": 2048,
+  "do_sample": true,
+  "temperature": 0.3,
+  "top_k": 20,
+  "top_p": 0.7,
+  "repetition_penalty": 1.05,
+  "torch_dtype": "torch.float16",
+  "fp16": true,
+  "bf16": false,
+  "bnb_4bit_compute_dtype": "torch.float16",
+  "load_in_4bit": false,
+  "load_in_8bit": false,
+  "train_sampler_random": true,
+  "deepspeed": {
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "zero_allow_untested_optimizer": true,
+    "fp16": {
+      "enabled": "auto",
+      "loss_scale": 0,
+      "initial_scale_power": 16,
+      "loss_scale_window": 1000,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+    },
+    "zero_optimization": {
+      "stage": 2,
+      "allgather_partitions": true,
+      "allgather_bucket_size": 500000000.0,
+      "reduce_scatter": true,
+      "reduce_bucket_size": 500000000.0,
+      "overlap_comm": false,
+      "contiguous_gradients": true
+    }
+  }
+}

checkpoints/v13-20240202-072530/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/v13-20240202-072530/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/v13-20240202-072530/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

checkpoints/v13-20240202-072530/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n'  + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

checkpoints/v13-20240202-072530/trainer_state.json ADDED Viewed

	@@ -0,0 +1,293 @@

+{
+  "best_metric": 0.69132841,
+  "best_model_checkpoint": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530/checkpoint-1600",
+  "epoch": 1.9144144144144144,
+  "eval_steps": 100,
+  "global_step": 1700,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "acc": 0.74863632,
+      "epoch": 0.11,
+      "learning_rate": 9.158878504672898e-05,
+      "loss": 0.96384094,
+      "step": 100
+    },
+    {
+      "epoch": 0.11,
+      "eval_acc": 0.7936049620361458,
+      "eval_loss": 0.7536066770553589,
+      "eval_runtime": 42.7757,
+      "eval_samples_per_second": 3.366,
+      "eval_steps_per_second": 1.683,
+      "step": 100
+    },
+    {
+      "acc": 0.78036346,
+      "epoch": 0.23,
+      "learning_rate": 9.738751814223513e-05,
+      "loss": 0.77767517,
+      "step": 200
+    },
+    {
+      "epoch": 0.23,
+      "eval_acc": 0.7983638113570741,
+      "eval_loss": 0.7331455945968628,
+      "eval_runtime": 42.7344,
+      "eval_samples_per_second": 3.37,
+      "eval_steps_per_second": 1.685,
+      "step": 200
+    },
+    {
+      "acc": 0.78766861,
+      "epoch": 0.34,
+      "learning_rate": 9.448476052249638e-05,
+      "loss": 0.74959351,
+      "step": 300
+    },
+    {
+      "epoch": 0.34,
+      "eval_acc": 0.8007432360175383,
+      "eval_loss": 0.721094012260437,
+      "eval_runtime": 42.8657,
+      "eval_samples_per_second": 3.359,
+      "eval_steps_per_second": 1.68,
+      "step": 300
+    },
+    {
+      "acc": 0.79140198,
+      "epoch": 0.45,
+      "learning_rate": 9.158200290275763e-05,
+      "loss": 0.73811386,
+      "step": 400
+    },
+    {
+      "epoch": 0.45,
+      "eval_acc": 0.8014650839482408,
+      "eval_loss": 0.7137336730957031,
+      "eval_runtime": 42.7833,
+      "eval_samples_per_second": 3.366,
+      "eval_steps_per_second": 1.683,
+      "step": 400
+    },
+    {
+      "acc": 0.79137726,
+      "epoch": 0.56,
+      "learning_rate": 8.867924528301888e-05,
+      "loss": 0.73802383,
+      "step": 500
+    },
+    {
+      "epoch": 0.56,
+      "eval_acc": 0.8026948989412896,
+      "eval_loss": 0.7035187482833862,
+      "eval_runtime": 42.8037,
+      "eval_samples_per_second": 3.364,
+      "eval_steps_per_second": 1.682,
+      "step": 500
+    },
+    {
+      "acc": 0.79086266,
+      "epoch": 0.68,
+      "learning_rate": 8.577648766328012e-05,
+      "loss": 0.73371964,
+      "step": 600
+    },
+    {
+      "epoch": 0.68,
+      "eval_acc": 0.8036573628488932,
+      "eval_loss": 0.7004870772361755,
+      "eval_runtime": 42.738,
+      "eval_samples_per_second": 3.369,
+      "eval_steps_per_second": 1.685,
+      "step": 600
+    },
+    {
+      "acc": 0.79403992,
+      "epoch": 0.79,
+      "learning_rate": 8.290275761973875e-05,
+      "loss": 0.72441986,
+      "step": 700
+    },
+    {
+      "epoch": 0.79,
+      "eval_acc": 0.8039781841514276,
+      "eval_loss": 0.6986453533172607,
+      "eval_runtime": 42.7916,
+      "eval_samples_per_second": 3.365,
+      "eval_steps_per_second": 1.683,
+      "step": 700
+    },
+    {
+      "acc": 0.79215607,
+      "epoch": 0.9,
+      "learning_rate": 8e-05,
+      "loss": 0.72639374,
+      "step": 800
+    },
+    {
+      "epoch": 0.9,
+      "eval_acc": 0.8061437279435355,
+      "eval_loss": 0.6950626373291016,
+      "eval_runtime": 42.7835,
+      "eval_samples_per_second": 3.366,
+      "eval_steps_per_second": 1.683,
+      "step": 800
+    },
+    {
+      "acc": 0.79285507,
+      "epoch": 1.01,
+      "learning_rate": 7.709724238026124e-05,
+      "loss": 0.72425797,
+      "step": 900
+    },
+    {
+      "epoch": 1.01,
+      "eval_acc": 0.8062774034862582,
+      "eval_loss": 0.7004315257072449,
+      "eval_runtime": 42.8077,
+      "eval_samples_per_second": 3.364,
+      "eval_steps_per_second": 1.682,
+      "step": 900
+    },
+    {
+      "acc": 0.81319962,
+      "epoch": 1.13,
+      "learning_rate": 7.41944847605225e-05,
+      "loss": 0.63495399,
+      "step": 1000
+    },
+    {
+      "epoch": 1.13,
+      "eval_acc": 0.804539621430863,
+      "eval_loss": 0.6987484693527222,
+      "eval_runtime": 42.8689,
+      "eval_samples_per_second": 3.359,
+      "eval_steps_per_second": 1.68,
+      "step": 1000
+    },
+    {
+      "acc": 0.81136383,
+      "epoch": 1.24,
+      "learning_rate": 7.129172714078375e-05,
+      "loss": 0.63906494,
+      "step": 1100
+    },
+    {
+      "epoch": 1.24,
+      "eval_acc": 0.8052614693615656,
+      "eval_loss": 0.6985421776771545,
+      "eval_runtime": 42.8262,
+      "eval_samples_per_second": 3.362,
+      "eval_steps_per_second": 1.681,
+      "step": 1100
+    },
+    {
+      "acc": 0.81388062,
+      "epoch": 1.35,
+      "learning_rate": 6.8388969521045e-05,
+      "loss": 0.63106087,
+      "step": 1200
+    },
+    {
+      "epoch": 1.35,
+      "eval_acc": 0.8074002780451288,
+      "eval_loss": 0.6972200274467468,
+      "eval_runtime": 42.7351,
+      "eval_samples_per_second": 3.37,
+      "eval_steps_per_second": 1.685,
+      "step": 1200
+    },
+    {
+      "acc": 0.81398033,
+      "epoch": 1.46,
+      "learning_rate": 6.548621190130625e-05,
+      "loss": 0.63091103,
+      "step": 1300
+    },
+    {
+      "epoch": 1.46,
+      "eval_acc": 0.807854774890386,
+      "eval_loss": 0.6948702931404114,
+      "eval_runtime": 42.8184,
+      "eval_samples_per_second": 3.363,
+      "eval_steps_per_second": 1.682,
+      "step": 1300
+    },
+    {
+      "acc": 0.81455872,
+      "epoch": 1.58,
+      "learning_rate": 6.258345428156749e-05,
+      "loss": 0.62791916,
+      "step": 1400
+    },
+    {
+      "epoch": 1.58,
+      "eval_acc": 0.8081755961929206,
+      "eval_loss": 0.6945727467536926,
+      "eval_runtime": 42.7416,
+      "eval_samples_per_second": 3.369,
+      "eval_steps_per_second": 1.685,
+      "step": 1400
+    },
+    {
+      "acc": 0.81421364,
+      "epoch": 1.69,
+      "learning_rate": 5.968069666182874e-05,
+      "loss": 0.62950912,
+      "step": 1500
+    },
+    {
+      "epoch": 1.69,
+      "eval_acc": 0.8069992514169607,
+      "eval_loss": 0.692737340927124,
+      "eval_runtime": 42.8216,
+      "eval_samples_per_second": 3.363,
+      "eval_steps_per_second": 1.681,
+      "step": 1500
+    },
+    {
+      "acc": 0.81544525,
+      "epoch": 1.8,
+      "learning_rate": 5.6777939042089986e-05,
+      "loss": 0.63058929,
+      "step": 1600
+    },
+    {
+      "epoch": 1.8,
+      "eval_acc": 0.8083894770612768,
+      "eval_loss": 0.6913284063339233,
+      "eval_runtime": 42.8047,
+      "eval_samples_per_second": 3.364,
+      "eval_steps_per_second": 1.682,
+      "step": 1600
+    },
+    {
+      "acc": 0.81801094,
+      "epoch": 1.91,
+      "learning_rate": 5.387518142235124e-05,
+      "loss": 0.61622761,
+      "step": 1700
+    },
+    {
+      "epoch": 1.91,
+      "eval_acc": 0.8079349802160197,
+      "eval_loss": 0.6917322874069214,
+      "eval_runtime": 42.7927,
+      "eval_samples_per_second": 3.365,
+      "eval_steps_per_second": 1.683,
+      "step": 1700
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 3552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 100,
+  "total_flos": 2.357686760579793e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/v13-20240202-072530/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0e739e218b320d75a6767aaeeec34986b2dc66c1f5b4f77b76cadee550865a3
+size 8120

checkpoints/v13-20240206-111010/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+## Training procedure
+### Framework versions
+- SWIFT 1.5.2
+### Base model information
+- BaseModel Class LlamaForCausalLM

checkpoints/v13-20240206-111010/configuration.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "adapter_cfg": {
+        "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+        "model_revision": "master",
+        "sft_type": "lora",
+        "tuner_backend": "swift",
+        "template_type": "llama",
+        "dtype": "fp16",
+        "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    }
+}

checkpoints/v13-20240206-111010/default/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "enable_lora": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "lora_dtype": "fp32",
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [],
+  "peft_type": null,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "swift_type": "LORA",
+  "target_modules": [
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "down_proj",
+    "v_proj",
+    "o_proj",
+    "k_proj"
+  ],
+  "task_type": null,
+  "use_merged_linear": false,
+  "use_qa_lora": false
+}

checkpoints/v13-20240206-111010/default/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17b5c79fce9c77d700d912290deded4a49c40a1f14269f9d99d900eb411a70cd
+size 125912272

checkpoints/v13-20240206-111010/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "eos_token_id": 2,
+  "max_new_tokens": 512,
+  "pad_token_id": 2,
+  "repetition_penalty": 0.99,
+  "transformers_version": "4.37.2"
+}

checkpoints/v13-20240206-111010/sft_args.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "model_type": "solar-10-7b-instruct-v1",
+  "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+  "model_revision": "master",
+  "model_cache_dir": "/mnt/cachenew/yangzekang/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0",
+  "sft_type": "lora",
+  "freeze_parameters": 0.0,
+  "additional_trainable_parameters": [],
+  "tuner_backend": "swift",
+  "template_type": "llama",
+  "output_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010",
+  "add_output_dir_suffix": true,
+  "ddp_backend": "nccl",
+  "seed": 42,
+  "resume_from_checkpoint": null,
+  "dtype": "fp16",
+  "dataset": [
+    "_custom_dataset"
+  ],
+  "dataset_seed": 42,
+  "dataset_test_ratio": 0.01,
+  "train_dataset_sample": -1,
+  "val_dataset_sample": null,
+  "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.",
+  "max_length": 3072,
+  "truncation_strategy": "delete",
+  "check_dataset_strategy": "warning",
+  "custom_train_dataset_path": [
+    "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/data/wsdm/model/pseudo/phase_1/best_eval_1.00/release_train_data.json"
+  ],
+  "custom_val_dataset_path": [],
+  "self_cognition_sample": 0,
+  "model_name": [
+    null,
+    null
+  ],
+  "model_author": [
+    null,
+    null
+  ],
+  "quantization_bit": 0,
+  "bnb_4bit_comp_dtype": "fp16",
+  "bnb_4bit_quant_type": "nf4",
+  "bnb_4bit_use_double_quant": true,
+  "lora_target_modules": [
+    "up_proj",
+    "gate_proj",
+    "q_proj",
+    "down_proj",
+    "v_proj",
+    "o_proj",
+    "k_proj"
+  ],
+  "lora_rank": 8,
+  "lora_alpha": 16,
+  "lora_dropout_p": 0.05,
+  "lora_bias_trainable": "none",
+  "lora_modules_to_save": [],
+  "lora_dtype": "fp32",
+  "neftune_alpha": 0.0,
+  "gradient_checkpointing": true,
+  "deepspeed_config_path": null,
+  "batch_size": 1,
+  "eval_batch_size": 1,
+  "num_train_epochs": 4,
+  "max_steps": -1,
+  "optim": "adamw_torch",
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "learning_rate": 0.0001,
+  "weight_decay": 0.01,
+  "gradient_accumulation_steps": 8,
+  "max_grad_norm": 0.5,
+  "predict_with_generate": false,
+  "lr_scheduler_type": "linear",
+  "warmup_ratio": 0.03,
+  "eval_steps": 1700,
+  "save_steps": 1700,
+  "save_only_model": true,
+  "save_total_limit": null,
+  "logging_steps": 100,
+  "dataloader_num_workers": 1,
+  "push_to_hub": false,
+  "hub_model_id": "solar-10-7b-instruct-v1-lora",
+  "hub_private_repo": true,
+  "push_hub_strategy": "push_best",
+  "hub_token": null,
+  "test_oom_error": false,
+  "disable_tqdm": false,
+  "lazy_tokenize": false,
+  "preprocess_num_proc": 1,
+  "use_flash_attn": null,
+  "ignore_args_error": false,
+  "check_model_is_latest": false,
+  "logging_dir": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010/runs",
+  "report_to": [
+    "all"
+  ],
+  "acc_strategy": "token",
+  "save_on_each_node": true,
+  "save_strategy": "steps",
+  "save_safetensors": true,
+  "max_new_tokens": 2048,
+  "do_sample": true,
+  "temperature": 0.3,
+  "top_k": 20,
+  "top_p": 0.7,
+  "repetition_penalty": 1.05,
+  "num_beams": 1,
+  "only_save_model": true,
+  "torch_dtype": "torch.float16",
+  "fp16": true,
+  "bf16": false,
+  "bnb_4bit_compute_dtype": "torch.float16",
+  "load_in_4bit": false,
+  "load_in_8bit": false,
+  "train_sampler_random": true,
+  "deepspeed": null
+}

checkpoints/v13-20240206-111010/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/v13-20240206-111010/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/v13-20240206-111010/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

checkpoints/v13-20240206-111010/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{% if message['content']%}{{'### System:\n' + message['content']+'\n\n'}}{% endif %}{% elif message['role'] == 'user' %}{{'### User:\n' + message['content']+'\n\n'}}{% elif message['role'] == 'assistant' %}{{'### Assistant:\n'  + message['content']}}{% endif %}{% if loop.last and add_generation_prompt %}{{ '### Assistant:\n' }}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

checkpoints/v13-20240206-111010/trainer_state.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "best_metric": 0.68276149,
+  "best_model_checkpoint": "/mnt/cachenew/yangzekang/wsdm_lym/swift_wsdm/output/solar-10-7b-instruct-v1/v13-20240206-111010/checkpoint-1700",
+  "epoch": 1.9144144144144144,
+  "eval_steps": 1700,
+  "global_step": 1700,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "acc": 0.67516661,
+      "epoch": 0.0,
+      "learning_rate": 9.345794392523364e-07,
+      "loss": 1.46797299,
+      "step": 1
+    },
+    {
+      "acc": 0.75082575,
+      "epoch": 0.11,
+      "learning_rate": 9.252336448598131e-05,
+      "loss": 0.95137394,
+      "step": 100
+    },
+    {
+      "acc": 0.78081596,
+      "epoch": 0.23,
+      "learning_rate": 9.732946298984034e-05,
+      "loss": 0.77715546,
+      "step": 200
+    },
+    {
+      "acc": 0.7879702,
+      "epoch": 0.34,
+      "learning_rate": 9.44267053701016e-05,
+      "loss": 0.7487645,
+      "step": 300
+    },
+    {
+      "acc": 0.79198853,
+      "epoch": 0.45,
+      "learning_rate": 9.152394775036285e-05,
+      "loss": 0.73725586,
+      "step": 400
+    },
+    {
+      "acc": 0.79181892,
+      "epoch": 0.56,
+      "learning_rate": 8.86211901306241e-05,
+      "loss": 0.73659386,
+      "step": 500
+    },
+    {
+      "acc": 0.791054,
+      "epoch": 0.68,
+      "learning_rate": 8.571843251088535e-05,
+      "loss": 0.73336884,
+      "step": 600
+    },
+    {
+      "acc": 0.79418472,
+      "epoch": 0.79,
+      "learning_rate": 8.284470246734399e-05,
+      "loss": 0.72362808,
+      "step": 700
+    },
+    {
+      "acc": 0.79279587,
+      "epoch": 0.9,
+      "learning_rate": 7.994194484760524e-05,
+      "loss": 0.72401505,
+      "step": 800
+    },
+    {
+      "acc": 0.79312164,
+      "epoch": 1.01,
+      "learning_rate": 7.703918722786648e-05,
+      "loss": 0.7227565,
+      "step": 900
+    },
+    {
+      "acc": 0.8135437,
+      "epoch": 1.13,
+      "learning_rate": 7.413642960812773e-05,
+      "loss": 0.63391575,
+      "step": 1000
+    },
+    {
+      "acc": 0.81166725,
+      "epoch": 1.24,
+      "learning_rate": 7.123367198838897e-05,
+      "loss": 0.63856529,
+      "step": 1100
+    },
+    {
+      "acc": 0.81463379,
+      "epoch": 1.35,
+      "learning_rate": 6.833091436865022e-05,
+      "loss": 0.63027618,
+      "step": 1200
+    },
+    {
+      "acc": 0.81466286,
+      "epoch": 1.46,
+      "learning_rate": 6.542815674891147e-05,
+      "loss": 0.62910736,
+      "step": 1300
+    },
+    {
+      "acc": 0.81553825,
+      "epoch": 1.58,
+      "learning_rate": 6.252539912917271e-05,
+      "loss": 0.62564072,
+      "step": 1400
+    },
+    {
+      "acc": 0.81496376,
+      "epoch": 1.69,
+      "learning_rate": 5.9622641509433966e-05,
+      "loss": 0.62815582,
+      "step": 1500
+    },
+    {
+      "acc": 0.81624107,
+      "epoch": 1.8,
+      "learning_rate": 5.671988388969521e-05,
+      "loss": 0.6277507,
+      "step": 1600
+    },
+    {
+      "acc": 0.81836472,
+      "epoch": 1.91,
+      "learning_rate": 5.381712626995646e-05,
+      "loss": 0.61489536,
+      "step": 1700
+    },
+    {
+      "epoch": 1.91,
+      "eval_acc": 0.8124290204157093,
+      "eval_loss": 0.6827614903450012,
+      "eval_runtime": 19.6247,
+      "eval_samples_per_second": 7.338,
+      "eval_steps_per_second": 3.669,
+      "step": 1700
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 3552,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 1700,
+  "total_flos": 2.357705064388231e+18,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoints/v13-20240206-111010/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4aca4f9bd7970195cea06fd363091f42810a0b49c8bf34e182632eb6bf1e90e
+size 6584

checkpoints/v16-20240206-224659/README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+## Training procedure
+### Framework versions
+- SWIFT 1.5.2
+### Base model information
+- BaseModel Class LlamaForCausalLM

checkpoints/v16-20240206-224659/configuration.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "adapter_cfg": {
+        "model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0",
+        "model_revision": "master",
+        "sft_type": "lora",
+        "tuner_backend": "swift",
+        "template_type": "llama",
+        "dtype": "fp16",
+        "system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
+    }
+}