term_Qwen2_7_json_lora / running_log.txt

Upload folder using huggingface_hub

4095c97 verified about 2 months ago

No virus

25.2 kB

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/vocab.json

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/merges.txt

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/tokenizer.json

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None

	06/06/2024 20:45:49 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/tokenizer_config.json

	06/06/2024 20:45:49 - WARNING - transformers.tokenization_utils_base - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.

	06/06/2024 20:45:49 - INFO - llamafactory.data.template - Replace eos token: <\|im_end\|>

	06/06/2024 20:45:49 - INFO - llamafactory.data.loader - Loading dataset train.json...

	06/06/2024 20:45:49 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json

	06/06/2024 20:45:49 - INFO - transformers.configuration_utils - Model config Qwen2Config {
	"_name_or_path": "Qwen/Qwen2-7B-Instruct-GPTQ-Int8",
	"architectures": [
	"Qwen2ForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 151643,
	"eos_token_id": 151645,
	"hidden_act": "silu",
	"hidden_size": 3584,
	"initializer_range": 0.02,
	"intermediate_size": 18944,
	"max_position_embeddings": 32768,
	"max_window_layers": 28,
	"model_type": "qwen2",
	"num_attention_heads": 28,
	"num_hidden_layers": 28,
	"num_key_value_heads": 4,
	"quantization_config": {
	"batch_size": 1,
	"bits": 8,
	"block_name_to_quantize": null,
	"cache_block_outputs": true,
	"damp_percent": 0.1,
	"dataset": null,
	"desc_act": false,
	"exllama_config": {
	"version": 1
	},
	"group_size": 128,
	"max_input_length": null,
	"model_seqlen": null,
	"module_name_preceding_first_block": null,
	"modules_in_block_to_quantize": null,
	"pad_token_id": null,
	"quant_method": "gptq",
	"sym": true,
	"tokenizer": null,
	"true_sequential": true,
	"use_cuda_fp16": false,
	"use_exllama": true
	},
	"rms_norm_eps": 1e-06,
	"rope_theta": 1000000.0,
	"sliding_window": 131072,
	"tie_word_embeddings": false,
	"torch_dtype": "float16",
	"transformers_version": "4.41.2",
	"use_cache": true,
	"use_sliding_window": false,
	"vocab_size": 152064
	}


	06/06/2024 20:45:49 - WARNING - llamafactory.model.model_utils.attention - FlashAttention-2 is not installed.

	06/06/2024 20:45:49 - INFO - llamafactory.model.model_utils.quantization - Loading 8-bit GPTQ-quantized model.

	06/06/2024 20:45:49 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/model.safetensors.index.json

	06/06/2024 20:46:15 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float16.

	06/06/2024 20:46:15 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
	"bos_token_id": 151643,
	"eos_token_id": 151645,
	"use_cache": false
	}


	06/06/2024 20:46:41 - WARNING - transformers.modeling_utils - Some weights of the model checkpoint at Qwen/Qwen2-7B-Instruct-GPTQ-Int8 were not used when initializing Qwen2ForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias', 'model.layers.1.mlp.up_proj.bias', 'model.layers.1.self_attn.o_proj.bias', 'model.layers.10.mlp.down_proj.bias', 'model.layers.10.mlp.gate_proj.bias', 'model.layers.10.mlp.up_proj.bias', 'model.layers.10.self_attn.o_proj.bias', 'model.layers.11.mlp.down_proj.bias', 'model.layers.11.mlp.gate_proj.bias', 'model.layers.11.mlp.up_proj.bias', 'model.layers.11.self_attn.o_proj.bias', 'model.layers.12.mlp.down_proj.bias', 'model.layers.12.mlp.gate_proj.bias', 'model.layers.12.mlp.up_proj.bias', 'model.layers.12.self_attn.o_proj.bias', 'model.layers.13.mlp.down_proj.bias', 'model.layers.13.mlp.gate_proj.bias', 'model.layers.13.mlp.up_proj.bias', 'model.layers.13.self_attn.o_proj.bias', 'model.layers.14.mlp.down_proj.bias', 'model.layers.14.mlp.gate_proj.bias', 'model.layers.14.mlp.up_proj.bias', 'model.layers.14.self_attn.o_proj.bias', 'model.layers.15.mlp.down_proj.bias', 'model.layers.15.mlp.gate_proj.bias', 'model.layers.15.mlp.up_proj.bias', 'model.layers.15.self_attn.o_proj.bias', 'model.layers.16.mlp.down_proj.bias', 'model.layers.16.mlp.gate_proj.bias', 'model.layers.16.mlp.up_proj.bias', 'model.layers.16.self_attn.o_proj.bias', 'model.layers.17.mlp.down_proj.bias', 'model.layers.17.mlp.gate_proj.bias', 'model.layers.17.mlp.up_proj.bias', 'model.layers.17.self_attn.o_proj.bias', 'model.layers.18.mlp.down_proj.bias', 'model.layers.18.mlp.gate_proj.bias', 'model.layers.18.mlp.up_proj.bias', 'model.layers.18.self_attn.o_proj.bias', 'model.layers.19.mlp.down_proj.bias', 'model.layers.19.mlp.gate_proj.bias', 'model.layers.19.mlp.up_proj.bias', 'model.layers.19.self_attn.o_proj.bias', 'model.layers.2.mlp.down_proj.bias', 'model.layers.2.mlp.gate_proj.bias', 'model.layers.2.mlp.up_proj.bias', 'model.layers.2.self_attn.o_proj.bias', 'model.layers.20.mlp.down_proj.bias', 'model.layers.20.mlp.gate_proj.bias', 'model.layers.20.mlp.up_proj.bias', 'model.layers.20.self_attn.o_proj.bias', 'model.layers.21.mlp.down_proj.bias', 'model.layers.21.mlp.gate_proj.bias', 'model.layers.21.mlp.up_proj.bias', 'model.layers.21.self_attn.o_proj.bias', 'model.layers.22.mlp.down_proj.bias', 'model.layers.22.mlp.gate_proj.bias', 'model.layers.22.mlp.up_proj.bias', 'model.layers.22.self_attn.o_proj.bias', 'model.layers.23.mlp.down_proj.bias', 'model.layers.23.mlp.gate_proj.bias', 'model.layers.23.mlp.up_proj.bias', 'model.layers.23.self_attn.o_proj.bias', 'model.layers.24.mlp.down_proj.bias', 'model.layers.24.mlp.gate_proj.bias', 'model.layers.24.mlp.up_proj.bias', 'model.layers.24.self_attn.o_proj.bias', 'model.layers.25.mlp.down_proj.bias', 'model.layers.25.mlp.gate_proj.bias', 'model.layers.25.mlp.up_proj.bias', 'model.layers.25.self_attn.o_proj.bias', 'model.layers.26.mlp.down_proj.bias', 'model.layers.26.mlp.gate_proj.bias', 'model.layers.26.mlp.up_proj.bias', 'model.layers.26.self_attn.o_proj.bias', 'model.layers.27.mlp.down_proj.bias', 'model.layers.27.mlp.gate_proj.bias', 'model.layers.27.mlp.up_proj.bias', 'model.layers.27.self_attn.o_proj.bias', 'model.layers.3.mlp.down_proj.bias', 'model.layers.3.mlp.gate_proj.bias', 'model.layers.3.mlp.up_proj.bias', 'model.layers.3.self_attn.o_proj.bias', 'model.layers.4.mlp.down_proj.bias', 'model.layers.4.mlp.gate_proj.bias', 'model.layers.4.mlp.up_proj.bias', 'model.layers.4.self_attn.o_proj.bias', 'model.layers.5.mlp.down_proj.bias', 'model.layers.5.mlp.gate_proj.bias', 'model.layers.5.mlp.up_proj.bias', 'model.layers.5.self_attn.o_proj.bias', 'model.layers.6.mlp.down_proj.bias', 'model.layers.6.mlp.gate_proj.bias', 'model.layers.6.mlp.up_proj.bias', 'model.layers.6.self_attn.o_proj.bias', 'model.layers.7.mlp.down_proj.bias', 'model.layers.7.mlp.gate_proj.bias', 'model.layers.7.mlp.up_proj.bias', 'model.layers.7.self_attn.o_proj.bias', 'model.layers.8.mlp.down_proj.bias', 'model.layers.8.mlp.gate_proj.bias', 'model.layers.8.mlp.up_proj.bias', 'model.layers.8.self_attn.o_proj.bias', 'model.layers.9.mlp.down_proj.bias', 'model.layers.9.mlp.gate_proj.bias', 'model.layers.9.mlp.up_proj.bias', 'model.layers.9.self_attn.o_proj.bias']
	- This IS expected if you are initializing Qwen2ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
	- This IS NOT expected if you are initializing Qwen2ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

	06/06/2024 20:46:41 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct-GPTQ-Int8.
	If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.

	06/06/2024 20:46:41 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/generation_config.json

	06/06/2024 20:46:41 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
	"bos_token_id": 151643,
	"do_sample": true,
	"eos_token_id": [
	151645,
	151643
	],
	"pad_token_id": 151643,
	"repetition_penalty": 1.05,
	"top_p": 0.8
	}


	06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.

	06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.

	06/06/2024 20:46:41 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.

	06/06/2024 20:46:41 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA

	06/06/2024 20:46:41 - INFO - llamafactory.model.model_utils.misc - Found linear modules: q_proj,o_proj,k_proj,gate_proj,down_proj,up_proj,v_proj

	06/06/2024 20:46:42 - INFO - llamafactory.model.loader - trainable params: 20185088 \|\| all params: 1110384128 \|\| trainable%: 1.8178

	06/06/2024 20:46:42 - INFO - transformers.trainer - Using auto half precision backend

	06/06/2024 20:46:42 - INFO - transformers.trainer - *** Running training ***

	06/06/2024 20:46:42 - INFO - transformers.trainer - Num examples = 9,600

	06/06/2024 20:46:42 - INFO - transformers.trainer - Num Epochs = 3

	06/06/2024 20:46:42 - INFO - transformers.trainer - Instantaneous batch size per device = 2

	06/06/2024 20:46:42 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16

	06/06/2024 20:46:42 - INFO - transformers.trainer - Gradient Accumulation steps = 8

	06/06/2024 20:46:42 - INFO - transformers.trainer - Total optimization steps = 1,800

	06/06/2024 20:46:42 - INFO - transformers.trainer - Number of trainable parameters = 20,185,088

	06/06/2024 20:48:17 - INFO - llamafactory.extras.callbacks - {'loss': 0.4212, 'learning_rate': 4.9999e-05, 'epoch': 0.01, 'throughput': 317.96}

	06/06/2024 20:49:49 - INFO - llamafactory.extras.callbacks - {'loss': 0.2803, 'learning_rate': 4.9996e-05, 'epoch': 0.02, 'throughput': 307.70}

	06/06/2024 20:51:24 - INFO - llamafactory.extras.callbacks - {'loss': 0.1484, 'learning_rate': 4.9991e-05, 'epoch': 0.03, 'throughput': 305.82}

	06/06/2024 20:52:54 - INFO - llamafactory.extras.callbacks - {'loss': 0.1151, 'learning_rate': 4.9985e-05, 'epoch': 0.03, 'throughput': 297.64}

	06/06/2024 20:54:29 - INFO - llamafactory.extras.callbacks - {'loss': 0.0751, 'learning_rate': 4.9976e-05, 'epoch': 0.04, 'throughput': 297.62}

	06/06/2024 20:56:07 - INFO - llamafactory.extras.callbacks - {'loss': 0.0471, 'learning_rate': 4.9966e-05, 'epoch': 0.05, 'throughput': 300.36}

	06/06/2024 20:57:47 - INFO - llamafactory.extras.callbacks - {'loss': 0.0368, 'learning_rate': 4.9953e-05, 'epoch': 0.06, 'throughput': 303.39}

	06/06/2024 20:59:22 - INFO - llamafactory.extras.callbacks - {'loss': 0.0311, 'learning_rate': 4.9939e-05, 'epoch': 0.07, 'throughput': 302.60}

	06/06/2024 21:00:58 - INFO - llamafactory.extras.callbacks - {'loss': 0.0262, 'learning_rate': 4.9923e-05, 'epoch': 0.07, 'throughput': 302.73}

	06/06/2024 21:02:36 - INFO - llamafactory.extras.callbacks - {'loss': 0.0244, 'learning_rate': 4.9905e-05, 'epoch': 0.08, 'throughput': 303.83}

	06/06/2024 21:04:11 - INFO - llamafactory.extras.callbacks - {'loss': 0.0266, 'learning_rate': 4.9885e-05, 'epoch': 0.09, 'throughput': 303.09}

	06/06/2024 21:05:46 - INFO - llamafactory.extras.callbacks - {'loss': 0.0283, 'learning_rate': 4.9868e-05, 'epoch': 0.10, 'throughput': 300.01}

	06/06/2024 21:07:21 - INFO - llamafactory.extras.callbacks - {'loss': 0.0265, 'learning_rate': 4.9844e-05, 'epoch': 0.11, 'throughput': 299.96}

	06/06/2024 21:08:54 - INFO - llamafactory.extras.callbacks - {'loss': 0.0235, 'learning_rate': 4.9819e-05, 'epoch': 0.12, 'throughput': 299.39}

	06/06/2024 21:10:28 - INFO - llamafactory.extras.callbacks - {'loss': 0.0229, 'learning_rate': 4.9792e-05, 'epoch': 0.12, 'throughput': 299.15}

	06/06/2024 21:12:01 - INFO - llamafactory.extras.callbacks - {'loss': 0.0238, 'learning_rate': 4.9763e-05, 'epoch': 0.13, 'throughput': 298.31}

	06/06/2024 21:13:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0208, 'learning_rate': 4.9732e-05, 'epoch': 0.14, 'throughput': 298.24}

	06/06/2024 21:15:11 - INFO - llamafactory.extras.callbacks - {'loss': 0.0191, 'learning_rate': 4.9699e-05, 'epoch': 0.15, 'throughput': 298.57}

	06/06/2024 21:16:43 - INFO - llamafactory.extras.callbacks - {'loss': 0.0205, 'learning_rate': 4.9664e-05, 'epoch': 0.16, 'throughput': 298.01}

	06/06/2024 21:18:20 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.9628e-05, 'epoch': 0.17, 'throughput': 298.56}

	06/06/2024 21:18:20 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100

	06/06/2024 21:18:20 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json

	06/06/2024 21:18:20 - INFO - transformers.configuration_utils - Model config Qwen2Config {
	"architectures": [
	"Qwen2ForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 151643,
	"eos_token_id": 151645,
	"hidden_act": "silu",
	"hidden_size": 3584,
	"initializer_range": 0.02,
	"intermediate_size": 18944,
	"max_position_embeddings": 32768,
	"max_window_layers": 28,
	"model_type": "qwen2",
	"num_attention_heads": 28,
	"num_hidden_layers": 28,
	"num_key_value_heads": 4,
	"quantization_config": {
	"batch_size": 1,
	"bits": 8,
	"block_name_to_quantize": null,
	"cache_block_outputs": true,
	"damp_percent": 0.1,
	"dataset": null,
	"desc_act": false,
	"exllama_config": {
	"version": 1
	},
	"group_size": 128,
	"max_input_length": null,
	"model_seqlen": null,
	"module_name_preceding_first_block": null,
	"modules_in_block_to_quantize": null,
	"pad_token_id": null,
	"quant_method": "gptq",
	"sym": true,
	"tokenizer": null,
	"true_sequential": true,
	"use_cuda_fp16": false,
	"use_exllama": true
	},
	"rms_norm_eps": 1e-06,
	"rope_theta": 1000000.0,
	"sliding_window": 131072,
	"tie_word_embeddings": false,
	"torch_dtype": "float16",
	"transformers_version": "4.41.2",
	"use_cache": true,
	"use_sliding_window": false,
	"vocab_size": 152064
	}


	06/06/2024 21:18:20 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100/tokenizer_config.json

	06/06/2024 21:18:20 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-100/special_tokens_map.json

	06/06/2024 21:19:53 - INFO - llamafactory.extras.callbacks - {'loss': 0.0240, 'learning_rate': 4.9589e-05, 'epoch': 0.17, 'throughput': 298.12}

	06/06/2024 21:21:27 - INFO - llamafactory.extras.callbacks - {'loss': 0.0206, 'learning_rate': 4.9549e-05, 'epoch': 0.18, 'throughput': 297.89}

	06/06/2024 21:22:59 - INFO - llamafactory.extras.callbacks - {'loss': 0.0188, 'learning_rate': 4.9507e-05, 'epoch': 0.19, 'throughput': 297.43}

	06/06/2024 21:24:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0183, 'learning_rate': 4.9463e-05, 'epoch': 0.20, 'throughput': 297.64}

	06/06/2024 21:26:04 - INFO - llamafactory.extras.callbacks - {'loss': 0.0189, 'learning_rate': 4.9417e-05, 'epoch': 0.21, 'throughput': 297.06}

	06/06/2024 21:27:40 - INFO - llamafactory.extras.callbacks - {'loss': 0.0213, 'learning_rate': 4.9369e-05, 'epoch': 0.22, 'throughput': 297.53}

	06/06/2024 21:29:12 - INFO - llamafactory.extras.callbacks - {'loss': 0.0214, 'learning_rate': 4.9319e-05, 'epoch': 0.23, 'throughput': 297.22}

	06/06/2024 21:30:47 - INFO - llamafactory.extras.callbacks - {'loss': 0.0214, 'learning_rate': 4.9268e-05, 'epoch': 0.23, 'throughput': 297.22}

	06/06/2024 21:32:22 - INFO - llamafactory.extras.callbacks - {'loss': 0.0203, 'learning_rate': 4.9215e-05, 'epoch': 0.24, 'throughput': 297.36}

	06/06/2024 21:33:56 - INFO - llamafactory.extras.callbacks - {'loss': 0.0194, 'learning_rate': 4.9159e-05, 'epoch': 0.25, 'throughput': 297.35}

	06/06/2024 21:35:27 - INFO - llamafactory.extras.callbacks - {'loss': 0.0158, 'learning_rate': 4.9102e-05, 'epoch': 0.26, 'throughput': 296.96}

	06/06/2024 21:37:01 - INFO - llamafactory.extras.callbacks - {'loss': 0.0172, 'learning_rate': 4.9044e-05, 'epoch': 0.27, 'throughput': 296.94}

	06/06/2024 21:38:35 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.8983e-05, 'epoch': 0.28, 'throughput': 297.06}

	06/06/2024 21:40:13 - INFO - llamafactory.extras.callbacks - {'loss': 0.0189, 'learning_rate': 4.8920e-05, 'epoch': 0.28, 'throughput': 297.62}

	06/06/2024 21:41:49 - INFO - llamafactory.extras.callbacks - {'loss': 0.0190, 'learning_rate': 4.8856e-05, 'epoch': 0.29, 'throughput': 297.91}

	06/06/2024 21:43:23 - INFO - llamafactory.extras.callbacks - {'loss': 0.0168, 'learning_rate': 4.8790e-05, 'epoch': 0.30, 'throughput': 297.99}

	06/06/2024 21:45:00 - INFO - llamafactory.extras.callbacks - {'loss': 0.0194, 'learning_rate': 4.8722e-05, 'epoch': 0.31, 'throughput': 298.39}

	06/06/2024 21:46:33 - INFO - llamafactory.extras.callbacks - {'loss': 0.0155, 'learning_rate': 4.8652e-05, 'epoch': 0.32, 'throughput': 298.24}

	06/06/2024 21:48:05 - INFO - llamafactory.extras.callbacks - {'loss': 0.0193, 'learning_rate': 4.8581e-05, 'epoch': 0.33, 'throughput': 298.10}

	06/06/2024 21:49:40 - INFO - llamafactory.extras.callbacks - {'loss': 0.0164, 'learning_rate': 4.8507e-05, 'epoch': 0.33, 'throughput': 298.12}

	06/06/2024 21:49:40 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200

	06/06/2024 21:49:40 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json

	06/06/2024 21:49:40 - INFO - transformers.configuration_utils - Model config Qwen2Config {
	"architectures": [
	"Qwen2ForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 151643,
	"eos_token_id": 151645,
	"hidden_act": "silu",
	"hidden_size": 3584,
	"initializer_range": 0.02,
	"intermediate_size": 18944,
	"max_position_embeddings": 32768,
	"max_window_layers": 28,
	"model_type": "qwen2",
	"num_attention_heads": 28,
	"num_hidden_layers": 28,
	"num_key_value_heads": 4,
	"quantization_config": {
	"batch_size": 1,
	"bits": 8,
	"block_name_to_quantize": null,
	"cache_block_outputs": true,
	"damp_percent": 0.1,
	"dataset": null,
	"desc_act": false,
	"exllama_config": {
	"version": 1
	},
	"group_size": 128,
	"max_input_length": null,
	"model_seqlen": null,
	"module_name_preceding_first_block": null,
	"modules_in_block_to_quantize": null,
	"pad_token_id": null,
	"quant_method": "gptq",
	"sym": true,
	"tokenizer": null,
	"true_sequential": true,
	"use_cuda_fp16": false,
	"use_exllama": true
	},
	"rms_norm_eps": 1e-06,
	"rope_theta": 1000000.0,
	"sliding_window": 131072,
	"tie_word_embeddings": false,
	"torch_dtype": "float16",
	"transformers_version": "4.41.2",
	"use_cache": true,
	"use_sliding_window": false,
	"vocab_size": 152064
	}


	06/06/2024 21:49:40 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200/tokenizer_config.json

	06/06/2024 21:49:40 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/checkpoint-200/special_tokens_map.json

	06/06/2024 21:51:18 - INFO - llamafactory.extras.callbacks - {'loss': 0.0140, 'learning_rate': 4.8432e-05, 'epoch': 0.34, 'throughput': 298.66}

	06/06/2024 21:52:51 - INFO - llamafactory.extras.callbacks - {'loss': 0.0154, 'learning_rate': 4.8355e-05, 'epoch': 0.35, 'throughput': 298.57}

	06/06/2024 21:54:25 - INFO - llamafactory.extras.callbacks - {'loss': 0.0147, 'learning_rate': 4.8276e-05, 'epoch': 0.36, 'throughput': 298.63}

	06/06/2024 21:55:59 - INFO - llamafactory.extras.callbacks - {'loss': 0.0185, 'learning_rate': 4.8196e-05, 'epoch': 0.37, 'throughput': 298.72}

	06/06/2024 21:57:34 - INFO - llamafactory.extras.callbacks - {'loss': 0.0160, 'learning_rate': 4.8114e-05, 'epoch': 0.38, 'throughput': 298.74}

	06/06/2024 21:59:07 - INFO - llamafactory.extras.callbacks - {'loss': 0.0168, 'learning_rate': 4.8030e-05, 'epoch': 0.38, 'throughput': 298.65}

	06/06/2024 22:00:38 - INFO - llamafactory.extras.callbacks - {'loss': 0.0154, 'learning_rate': 4.7944e-05, 'epoch': 0.39, 'throughput': 298.39}

	06/06/2024 22:02:21 - INFO - llamafactory.extras.callbacks - {'loss': 0.0149, 'learning_rate': 4.7856e-05, 'epoch': 0.40, 'throughput': 299.32}

	06/06/2024 22:03:05 - INFO - transformers.trainer -

	Training completed. Do not forget to share your model on huggingface.co/models =)



	06/06/2024 22:03:05 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21

	06/06/2024 22:03:06 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/zeus/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct-GPTQ-Int8/snapshots/22d9ef924dfae7700c4b620be6600d5c7ea31f88/config.json

	06/06/2024 22:03:06 - INFO - transformers.configuration_utils - Model config Qwen2Config {
	"architectures": [
	"Qwen2ForCausalLM"
	],
	"attention_dropout": 0.0,
	"bos_token_id": 151643,
	"eos_token_id": 151645,
	"hidden_act": "silu",
	"hidden_size": 3584,
	"initializer_range": 0.02,
	"intermediate_size": 18944,
	"max_position_embeddings": 32768,
	"max_window_layers": 28,
	"model_type": "qwen2",
	"num_attention_heads": 28,
	"num_hidden_layers": 28,
	"num_key_value_heads": 4,
	"quantization_config": {
	"batch_size": 1,
	"bits": 8,
	"block_name_to_quantize": null,
	"cache_block_outputs": true,
	"damp_percent": 0.1,
	"dataset": null,
	"desc_act": false,
	"exllama_config": {
	"version": 1
	},
	"group_size": 128,
	"max_input_length": null,
	"model_seqlen": null,
	"module_name_preceding_first_block": null,
	"modules_in_block_to_quantize": null,
	"pad_token_id": null,
	"quant_method": "gptq",
	"sym": true,
	"tokenizer": null,
	"true_sequential": true,
	"use_cuda_fp16": false,
	"use_exllama": true
	},
	"rms_norm_eps": 1e-06,
	"rope_theta": 1000000.0,
	"sliding_window": 131072,
	"tie_word_embeddings": false,
	"torch_dtype": "float16",
	"transformers_version": "4.41.2",
	"use_cache": true,
	"use_sliding_window": false,
	"vocab_size": 152064
	}


	06/06/2024 22:03:06 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/tokenizer_config.json

	06/06/2024 22:03:06 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen2-7B-int8-Chat/lora/train_2024-06-06-20-45-21/special_tokens_map.json

	06/06/2024 22:03:06 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.

	06/06/2024 22:03:06 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
	{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}