|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/vocab.json |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/merges.txt |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/tokenizer.json |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/tokenizer_config.json |
|
|
|
06/06/2024 20:45:49 - WARNING - transformers.tokenization_utils_base - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. |
|
|
|
06/06/2024 20:45:49 - INFO - llamafactory.data.template - Replace eos token: <|im_end|> |
|
|
|
06/06/2024 20:45:49 - INFO - llamafactory.data.loader - Loading dataset train.json... |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"_name_or_path": "Qwen/Qwen2-7B-Instruct-GPTQ-Int8", |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151645, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"quantization_config": { |
|
"batch_size": 1, |
|
"bits": 8, |
|
"block_name_to_quantize": null, |
|
"cache_block_outputs": true, |
|
"damp_percent": 0.1, |
|
"dataset": null, |
|
"desc_act": false, |
|
"exllama_config": { |
|
"version": 1 |
|
}, |
|
"group_size": 128, |
|
"max_input_length": null, |
|
"model_seqlen": null, |
|
"module_name_preceding_first_block": null, |
|
"modules_in_block_to_quantize": null, |
|
"pad_token_id": null, |
|
"quant_method": "gptq", |
|
"sym": true, |
|
"tokenizer": null, |
|
"true_sequential": true, |
|
"use_cuda_fp16": false, |
|
"use_exllama": true |
|
}, |
|
"rms_norm_eps": 1e-06, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": 131072, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.41.2", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
06/06/2024 20:45:49 - WARNING - llamafactory.model.model_utils.attention - FlashAttention-2 is not installed. |
|
|
|
06/06/2024 20:45:49 - INFO - llamafactory.model.model_utils.quantization - Loading 8-bit GPTQ-quantized model. |
|
|
|
06/06/2024 20:45:49 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/model.safetensors.index.json |
|
|
|
06/06/2024 20:46:15 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float16. |
|
|
|
06/06/2024 20:46:15 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151645, |
|
"use_cache": false |
|
} |
|
|
|
|
|
06/06/2024 20:46:41 - WARNING - transformers.modeling_utils - Some weights of the model checkpoint at Qwen/Qwen2-7B-Instruct-GPTQ-Int8 were not used when initializing Qwen2ForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias', 'model.layers.1.mlp.up_proj.bias', 'model.layers.1.self_attn.o_proj.bias', 'model.layers.10.mlp.down_proj.bias', 'model.layers.10.mlp.gate_proj.bias', 'model.layers.10.mlp.up_proj.bias', 'model.layers.10.self_attn.o_proj.bias', 'model.layers.11.mlp.down_proj.bias', 'model.layers.11.mlp.gate_proj.bias', 'model.layers.11.mlp.up_proj.bias', 'model.layers.11.self_attn.o_proj.bias', 'model.layers.12.mlp.down_proj.bias', 'model.layers.12.mlp.gate_proj.bias', 'model.layers.12.mlp.up_proj.bias', 'model.layers.12.self_attn.o_proj.bias', 'model.layers.13.mlp.down_proj.bias', 'model.layers.13.mlp.gate_proj.bias', 'model.layers.13.mlp.up_proj.bias', 'model.layers.13.self_attn.o_proj.bias', 'model.layers.14.mlp.down_proj.bias', 'model.layers.14.mlp.gate_proj.bias', 'model.layers.14.mlp.up_proj.bias', 'model.layers.14.self_attn.o_proj.bias', 'model.layers.15.mlp.down_proj.bias', 'model.layers.15.mlp.gate_proj.bias', 'model.layers.15.mlp.up_proj.bias', 'model.layers.15.self_attn.o_proj.bias', 'model.layers.16.mlp.down_proj.bias', 'model.layers.16.mlp.gate_proj.bias', 'model.layers.16.mlp.up_proj.bias', 'model.layers.16.self_attn.o_proj.bias', 'model.layers.17.mlp.down_proj.bias', 'model.layers.17.mlp.gate_proj.bias', 'model.layers.17.mlp.up_proj.bias', 'model.layers.17.self_attn.o_proj.bias', 'model.layers.18.mlp.down_proj.bias', 'model.layers.18.mlp.gate_proj.bias', 'model.layers.18.mlp.up_proj.bias', 'model.layers.18.self_attn.o_proj.bias', 'model.layers.19.mlp.down_proj.bias', 'model.layers.19.mlp.gate_proj.bias', 'model.layers.19.mlp.up_proj.bias', 'model.layers.19.self_attn.o_proj.bias', 'model.layers.2.mlp.down_proj.bias', 'model.layers.2.mlp.gate_proj.bias', 'model.layers.2.mlp.up_proj.bias', 'model.layers.2.self_attn.o_proj.bias', 'model.layers.20.mlp.down_proj.bias', 'model.layers.20.mlp.gate_proj.bias', 'model.layers.20.mlp.up_proj.bias', 'model.layers.20.self_attn.o_proj.bias', 'model.layers.21.mlp.down_proj.bias', 'model.layers.21.mlp.gate_proj.bias', 'model.layers.21.mlp.up_proj.bias', 'model.layers.21.self_attn.o_proj.bias', 'model.layers.22.mlp.down_proj.bias', 'model.layers.22.mlp.gate_proj.bias', 'model.layers.22.mlp.up_proj.bias', 'model.layers.22.self_attn.o_proj.bias', 'model.layers.23.mlp.down_proj.bias', 'model.layers.23.mlp.gate_proj.bias', 'model.layers.23.mlp.up_proj.bias', 'model.layers.23.self_attn.o_proj.bias', 'model.layers.24.mlp.down_proj.bias', 'model.layers.24.mlp.gate_proj.bias', 'model.layers.24.mlp.up_proj.bias', 'model.layers.24.self_attn.o_proj.bias', 'model.layers.25.mlp.down_proj.bias', 'model.layers.25.mlp.gate_proj.bias', 'model.layers.25.mlp.up_proj.bias', 'model.layers.25.self_attn.o_proj.bias', 'model.layers.26.mlp.down_proj.bias', 'model.layers.26.mlp.gate_proj.bias', 'model.layers.26.mlp.up_proj.bias', 'model.layers.26.self_attn.o_proj.bias', 'model.layers.27.mlp.down_proj.bias', 'model.layers.27.mlp.gate_proj.bias', 'model.layers.27.mlp.up_proj.bias', 'model.layers.27.self_attn.o_proj.bias', 'model.layers.3.mlp.down_proj.bias', 'model.layers.3.mlp.gate_proj.bias', 'model.layers.3.mlp.up_proj.bias', 'model.layers.3.self_attn.o_proj.bias', 'model.layers.4.mlp.down_proj.bias', 'model.layers.4.mlp.gate_proj.bias', 'model.layers.4.mlp.up_proj.bias', 'model.layers.4.self_attn.o_proj.bias', 'model.layers.5.mlp.down_proj.bias', 'model.layers.5.mlp.gate_proj.bias', 'model.layers.5.mlp.up_proj.bias', 'model.layers.5.self_attn.o_proj.bias', 'model.layers.6.mlp.down_proj.bias', 'model.layers.6.mlp.gate_proj.bias', 'model.layers.6.mlp.up_proj.bias', 'model.layers.6.self_attn.o_proj.bias', 'model.layers.7.mlp.down_proj.bias', 'model.layers.7.mlp.gate_proj.bias', 'model.layers.7.mlp.up_proj.bias', 'model.layers.7.self_attn.o_proj.bias', 'model.layers.8.mlp.down_proj.bias', 'model.layers.8.mlp.gate_proj.bias', 'model.layers.8.mlp.up_proj.bias', 'model.layers.8.self_attn.o_proj.bias', 'model.layers.9.mlp.down_proj.bias', 'model.layers.9.mlp.gate_proj.bias', 'model.layers.9.mlp.up_proj.bias', 'model.layers.9.self_attn.o_proj.bias'] |
|
- This IS expected if you are initializing Qwen2ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). |
|
- This IS NOT expected if you are initializing Qwen2ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). |
|
|
|
06/06/2024 20:46:41 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct-GPTQ-Int8. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
|
06/06/2024 20:46:41 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/generation_config.json |
|
|
|
06/06/2024 20:46:41 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 151643, |
|
"do_sample": true, |
|
"eos_token_id": [ |
|
151645, |
|
151643 |
|
], |
|
"pad_token_id": 151643, |
|
"repetition_penalty": 1.05, |
|
"top_p": 0.8 |
|
} |
|
|
|
|
|
06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled. |
|
|
|
06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference. |
|
|
|
06/06/2024 20:46:41 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32. |
|
|
|
06/06/2024 20:46:41 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA |
|
|
|
06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,o_proj,k_proj,gate_proj,down_proj,up_proj,v_proj |
|
|
|
06/06/2024 20:46:42 - INFO - llamafactory.model.loader - trainable params: 20185088 || all params: 1110384128 || trainable%: 1.8178 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Using auto half precision backend |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - ***** Running training ***** |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Num examples = 9,600 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Num Epochs = 3 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Instantaneous batch size per device = 2 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Gradient Accumulation steps = 8 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Total optimization steps = 1,800 |
|
|
|
06/06/2024 20:46:42 - INFO - transformers.trainer - Number of trainable parameters = 20,185,088 |
|
|
|
06/06/2024 20:48:17 - INFO - llamafactory.extras.callbacks - {'loss': 0.4212, 'learning_rate': 4.9999e-05, 'epoch': 0.01, 'throughput': 317.96} |
|
|
|
06/06/2024 20:49:49 - INFO - llamafactory.extras.callbacks - {'loss': 0.2803, 'learning_rate': 4.9996e-05, 'epoch': 0.02, 'throughput': 307.70} |
|
|
|
06/06/2024 20:51:24 - INFO - llamafactory.extras.callbacks - {'loss': 0.1484, 'learning_rate': 4.9991e-05, 'epoch': 0.03, 'throughput': 305.82} |
|
|
|
06/06/2024 20:52:54 - INFO - llamafactory.extras.callbacks - {'loss': 0.1151, 'learning_rate': 4.9985e-05, 'epoch': 0.03, 'throughput': 297.64} |
|
|
|
06/06/2024 20:54:29 - INFO - llamafactory.extras.callbacks - {'loss': 0.0751, 'learning_rate': 4.9976e-05, 'epoch': 0.04, 'throughput': 297.62} |
|
|
|
06/06/2024 20:56:07 - INFO - llamafactory.extras.callbacks - {'loss': 0.0471, 'learning_rate': 4.9966e-05, 'epoch': 0.05, 'throughput': 300.36} |
|
|
|
06/06/2024 20:57:47 - INFO - llamafactory.extras.callbacks - {'loss': 0.0368, 'learning_rate': 4.9953e-05, 'epoch': 0.06, 'throughput': 303.39} |
|
|
|
06/06/2024 20:59:22 - INFO - llamafactory.extras.callbacks - {'loss': 0.0311, 'learning_rate': 4.9939e-05, 'epoch': 0.07, 'throughput': 302.60} |
|
|
|
06/06/2024 21:00:58 - INFO - llamafactory.extras.callbacks - {'loss': 0.0262, 'learning_rate': 4.9923e-05, 'epoch': 0.07, 'throughput': 302.73} |
|
|
|
06/06/2024 21:02:36 - INFO - llamafactory.extras.callbacks - {'loss': 0.0244, 'learning_rate': 4.9905e-05, 'epoch': 0.08, 'throughput': 303.83} |
|
|
|
06/06/2024 21:04:11 - INFO - llamafactory.extras.callbacks - {'loss': 0.0266, 'learning_rate': 4.9885e-05, 'epoch': 0.09, 'throughput': 303.09} |
|
|
|
06/06/2024 21:05:46 - INFO - llamafactory.extras.callbacks - {'loss': 0.0283, 'learning_rate': 4.9868e-05, 'epoch': 0.10, 'throughput': 300.01} |
|
|
|
06/06/2024 21:07:21 - INFO - llamafactory.extras.callbacks - {'loss': 0.0265, 'learning_rate': 4.9844e-05, 'epoch': 0.11, 'throughput': 299.96} |
|
|
|
06/06/2024 21:08:54 - INFO - llamafactory.extras.callbacks - {'loss': 0.0235, 'learning_rate': 4.9819e-05, 'epoch': 0.12, 'throughput': 299.39} |
|
|
|
06/06/2024 21:10:28 - INFO - llamafactory.extras.callbacks - {'loss': 0.0229, 'learning_rate': 4.9792e-05, 'epoch': 0.12, 'throughput': 299.15} |
|
|
|
06/06/2024 21:12:01 - INFO - llamafactory.extras.callbacks - {'loss': 0.0238, 'learning_rate': 4.9763e-05, 'epoch': 0.13, 'throughput': 298.31} |
|
|
|
06/06/2024 21:13:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0208, 'learning_rate': 4.9732e-05, 'epoch': 0.14, 'throughput': 298.24} |
|
|
|
06/06/2024 21:15:11 - INFO - llamafactory.extras.callbacks - {'loss': 0.0191, 'learning_rate': 4.9699e-05, 'epoch': 0.15, 'throughput': 298.57} |
|
|
|
06/06/2024 21:16:43 - INFO - llamafactory.extras.callbacks - {'loss': 0.0205, 'learning_rate': 4.9664e-05, 'epoch': 0.16, 'throughput': 298.01} |
|
|
|
06/06/2024 21:18:20 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.9628e-05, 'epoch': 0.17, 'throughput': 298.56} |
|
|
|
06/06/2024 21:18:20 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100 |
|
|
|
06/06/2024 21:18:20 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json |
|
|
|
06/06/2024 21:18:20 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151645, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"quantization_config": { |
|
"batch_size": 1, |
|
"bits": 8, |
|
"block_name_to_quantize": null, |
|
"cache_block_outputs": true, |
|
"damp_percent": 0.1, |
|
"dataset": null, |
|
"desc_act": false, |
|
"exllama_config": { |
|
"version": 1 |
|
}, |
|
"group_size": 128, |
|
"max_input_length": null, |
|
"model_seqlen": null, |
|
"module_name_preceding_first_block": null, |
|
"modules_in_block_to_quantize": null, |
|
"pad_token_id": null, |
|
"quant_method": "gptq", |
|
"sym": true, |
|
"tokenizer": null, |
|
"true_sequential": true, |
|
"use_cuda_fp16": false, |
|
"use_exllama": true |
|
}, |
|
"rms_norm_eps": 1e-06, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": 131072, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.41.2", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
06/06/2024 21:18:20 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100/tokenizer_config.json |
|
|
|
06/06/2024 21:18:20 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100/special_tokens_map.json |
|
|
|
06/06/2024 21:19:53 - INFO - llamafactory.extras.callbacks - {'loss': 0.0240, 'learning_rate': 4.9589e-05, 'epoch': 0.17, 'throughput': 298.12} |
|
|
|
06/06/2024 21:21:27 - INFO - llamafactory.extras.callbacks - {'loss': 0.0206, 'learning_rate': 4.9549e-05, 'epoch': 0.18, 'throughput': 297.89} |
|
|
|
06/06/2024 21:22:59 - INFO - llamafactory.extras.callbacks - {'loss': 0.0188, 'learning_rate': 4.9507e-05, 'epoch': 0.19, 'throughput': 297.43} |
|
|
|
06/06/2024 21:24:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0183, 'learning_rate': 4.9463e-05, 'epoch': 0.20, 'throughput': 297.64} |
|
|
|
06/06/2024 21:26:04 - INFO - llamafactory.extras.callbacks - {'loss': 0.0189, 'learning_rate': 4.9417e-05, 'epoch': 0.21, 'throughput': 297.06} |
|
|
|
06/06/2024 21:27:40 - INFO - llamafactory.extras.callbacks - {'loss': 0.0213, 'learning_rate': 4.9369e-05, 'epoch': 0.22, 'throughput': 297.53} |
|
|
|
06/06/2024 21:29:12 - INFO - llamafactory.extras.callbacks - {'loss': 0.0214, 'learning_rate': 4.9319e-05, 'epoch': 0.23, 'throughput': 297.22} |
|
|
|
06/06/2024 21:30:47 - INFO - llamafactory.extras.callbacks - {'loss': 0.0214, 'learning_rate': 4.9268e-05, 'epoch': 0.23, 'throughput': 297.22} |
|
|
|
06/06/2024 21:32:22 - INFO - llamafactory.extras.callbacks - {'loss': 0.0203, 'learning_rate': 4.9215e-05, 'epoch': 0.24, 'throughput': 297.36} |
|
|
|
06/06/2024 21:33:56 - INFO - llamafactory.extras.callbacks - {'loss': 0.0194, 'learning_rate': 4.9159e-05, 'epoch': 0.25, 'throughput': 297.35} |
|
|
|
06/06/2024 21:35:27 - INFO - llamafactory.extras.callbacks - {'loss': 0.0158, 'learning_rate': 4.9102e-05, 'epoch': 0.26, 'throughput': 296.96} |
|
|
|
06/06/2024 21:37:01 - INFO - llamafactory.extras.callbacks - {'loss': 0.0172, 'learning_rate': 4.9044e-05, 'epoch': 0.27, 'throughput': 296.94} |
|
|
|
06/06/2024 21:38:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.8983e-05, 'epoch': 0.28, 'throughput': 297.06} |
|
|
|
06/06/2024 21:40:13 - INFO - llamafactory.extras.callbacks - {'loss': 0.0189, 'learning_rate': 4.8920e-05, 'epoch': 0.28, 'throughput': 297.62} |
|
|
|
06/06/2024 21:41:49 - INFO - llamafactory.extras.callbacks - {'loss': 0.0190, 'learning_rate': 4.8856e-05, 'epoch': 0.29, 'throughput': 297.91} |
|
|
|
06/06/2024 21:43:23 - INFO - llamafactory.extras.callbacks - {'loss': 0.0168, 'learning_rate': 4.8790e-05, 'epoch': 0.30, 'throughput': 297.99} |
|
|
|
06/06/2024 21:45:00 - INFO - llamafactory.extras.callbacks - {'loss': 0.0194, 'learning_rate': 4.8722e-05, 'epoch': 0.31, 'throughput': 298.39} |
|
|
|
06/06/2024 21:46:33 - INFO - llamafactory.extras.callbacks - {'loss': 0.0155, 'learning_rate': 4.8652e-05, 'epoch': 0.32, 'throughput': 298.24} |
|
|
|
06/06/2024 21:48:05 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.8581e-05, 'epoch': 0.33, 'throughput': 298.10} |
|
|
|
06/06/2024 21:49:40 - INFO - llamafactory.extras.callbacks - {'loss': 0.0164, 'learning_rate': 4.8507e-05, 'epoch': 0.33, 'throughput': 298.12} |
|
|
|
06/06/2024 21:49:40 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200 |
|
|
|
06/06/2024 21:49:40 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json |
|
|
|
06/06/2024 21:49:40 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151645, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"quantization_config": { |
|
"batch_size": 1, |
|
"bits": 8, |
|
"block_name_to_quantize": null, |
|
"cache_block_outputs": true, |
|
"damp_percent": 0.1, |
|
"dataset": null, |
|
"desc_act": false, |
|
"exllama_config": { |
|
"version": 1 |
|
}, |
|
"group_size": 128, |
|
"max_input_length": null, |
|
"model_seqlen": null, |
|
"module_name_preceding_first_block": null, |
|
"modules_in_block_to_quantize": null, |
|
"pad_token_id": null, |
|
"quant_method": "gptq", |
|
"sym": true, |
|
"tokenizer": null, |
|
"true_sequential": true, |
|
"use_cuda_fp16": false, |
|
"use_exllama": true |
|
}, |
|
"rms_norm_eps": 1e-06, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": 131072, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.41.2", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
06/06/2024 21:49:40 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200/tokenizer_config.json |
|
|
|
06/06/2024 21:49:40 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200/special_tokens_map.json |
|
|
|
06/06/2024 21:51:18 - INFO - llamafactory.extras.callbacks - {'loss': 0.0140, 'learning_rate': 4.8432e-05, 'epoch': 0.34, 'throughput': 298.66} |
|
|
|
06/06/2024 21:52:51 - INFO - llamafactory.extras.callbacks - {'loss': 0.0154, 'learning_rate': 4.8355e-05, 'epoch': 0.35, 'throughput': 298.57} |
|
|
|
06/06/2024 21:54:25 - INFO - llamafactory.extras.callbacks - {'loss': 0.0147, 'learning_rate': 4.8276e-05, 'epoch': 0.36, 'throughput': 298.63} |
|
|
|
06/06/2024 21:55:59 - INFO - llamafactory.extras.callbacks - {'loss': 0.0185, 'learning_rate': 4.8196e-05, 'epoch': 0.37, 'throughput': 298.72} |
|
|
|
06/06/2024 21:57:34 - INFO - llamafactory.extras.callbacks - {'loss': 0.0160, 'learning_rate': 4.8114e-05, 'epoch': 0.38, 'throughput': 298.74} |
|
|
|
06/06/2024 21:59:07 - INFO - llamafactory.extras.callbacks - {'loss': 0.0168, 'learning_rate': 4.8030e-05, 'epoch': 0.38, 'throughput': 298.65} |
|
|
|
06/06/2024 22:00:38 - INFO - llamafactory.extras.callbacks - {'loss': 0.0154, 'learning_rate': 4.7944e-05, 'epoch': 0.39, 'throughput': 298.39} |
|
|
|
06/06/2024 22:02:21 - INFO - llamafactory.extras.callbacks - {'loss': 0.0149, 'learning_rate': 4.7856e-05, 'epoch': 0.40, 'throughput': 299.32} |
|
|
|
06/06/2024 22:03:05 - INFO - transformers.trainer - |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
06/06/2024 22:03:05 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21 |
|
|
|
06/06/2024 22:03:06 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json |
|
|
|
06/06/2024 22:03:06 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 151643, |
|
"eos_token_id": 151645, |
|
"hidden_act": "silu", |
|
"hidden_size": 3584, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 18944, |
|
"max_position_embeddings": 32768, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 28, |
|
"num_hidden_layers": 28, |
|
"num_key_value_heads": 4, |
|
"quantization_config": { |
|
"batch_size": 1, |
|
"bits": 8, |
|
"block_name_to_quantize": null, |
|
"cache_block_outputs": true, |
|
"damp_percent": 0.1, |
|
"dataset": null, |
|
"desc_act": false, |
|
"exllama_config": { |
|
"version": 1 |
|
}, |
|
"group_size": 128, |
|
"max_input_length": null, |
|
"model_seqlen": null, |
|
"module_name_preceding_first_block": null, |
|
"modules_in_block_to_quantize": null, |
|
"pad_token_id": null, |
|
"quant_method": "gptq", |
|
"sym": true, |
|
"tokenizer": null, |
|
"true_sequential": true, |
|
"use_cuda_fp16": false, |
|
"use_exllama": true |
|
}, |
|
"rms_norm_eps": 1e-06, |
|
"rope_theta": 1000000.0, |
|
"sliding_window": 131072, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "float16", |
|
"transformers_version": "4.41.2", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 152064 |
|
} |
|
|
|
|
|
06/06/2024 22:03:06 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/tokenizer_config.json |
|
|
|
06/06/2024 22:03:06 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/special_tokens_map.json |
|
|
|
06/06/2024 22:03:06 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot. |
|
|
|
06/06/2024 22:03:06 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|