|
05/15/2024 23:19:53 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/tokenizer.json |
|
|
|
05/15/2024 23:19:53 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None |
|
|
|
05/15/2024 23:19:53 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None |
|
|
|
05/15/2024 23:19:53 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/tokenizer_config.json |
|
|
|
05/15/2024 23:19:55 - INFO - llmtuner.data.loader - Loading dataset bigsmoke05/optimized-solidity-dataset... |
|
|
|
05/15/2024 23:20:04 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/config.json |
|
|
|
05/15/2024 23:20:04 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"_name_or_path": "Qwen/CodeQwen1.5-7B", |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 2, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 13440, |
|
"max_position_embeddings": 65536, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-05, |
|
"rope_theta": 1000000, |
|
"rotary_emb_base": 1000000, |
|
"seq_length": 65536, |
|
"sliding_window": 65536, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.41.0.dev0", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 92416 |
|
} |
|
|
|
|
|
05/15/2024 23:20:04 - INFO - llmtuner.model.utils.quantization - Quantizing model to 4 bit. |
|
|
|
05/15/2024 23:20:04 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/model.safetensors.index.json |
|
|
|
05/15/2024 23:20:04 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float16. |
|
|
|
05/15/2024 23:20:04 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 2, |
|
"eos_token_id": 2, |
|
"use_cache": false |
|
} |
|
|
|
|
|
05/15/2024 23:20:18 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing Qwen2ForCausalLM. |
|
|
|
|
|
05/15/2024 23:20:18 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/CodeQwen1.5-7B. |
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training. |
|
|
|
05/15/2024 23:20:18 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/generation_config.json |
|
|
|
05/15/2024 23:20:18 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig { |
|
"bos_token_id": 2, |
|
"eos_token_id": [ |
|
4, |
|
2 |
|
], |
|
"pad_token_id": 92298, |
|
"top_p": 0.95 |
|
} |
|
|
|
|
|
05/15/2024 23:20:19 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled. |
|
|
|
05/15/2024 23:20:19 - INFO - llmtuner.model.utils.attention - Using torch SDPA for faster training and inference. |
|
|
|
05/15/2024 23:20:19 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA |
|
|
|
05/15/2024 23:20:19 - INFO - llmtuner.model.loader - trainable params: 1638400 || all params: 7251922944 || trainable%: 0.0226 |
|
|
|
05/15/2024 23:20:19 - INFO - transformers.trainer - Using auto half precision backend |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - ***** Running training ***** |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Num examples = 39 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Num Epochs = 2 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Instantaneous batch size per device = 2 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 8 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Gradient Accumulation steps = 4 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Total optimization steps = 10 |
|
|
|
05/15/2024 23:20:20 - INFO - transformers.trainer - Number of trainable parameters = 1,638,400 |
|
|
|
05/15/2024 23:20:59 - INFO - llmtuner.extras.callbacks - {'loss': 0.7110, 'learning_rate': 4.0000e-05, 'epoch': 0.20} |
|
|
|
05/15/2024 23:21:31 - INFO - llmtuner.extras.callbacks - {'loss': 0.7668, 'learning_rate': 8.0000e-05, 'epoch': 0.40} |
|
|
|
05/15/2024 23:22:00 - INFO - llmtuner.extras.callbacks - {'loss': 0.9731, 'learning_rate': 1.2000e-04, 'epoch': 0.60} |
|
|
|
05/15/2024 23:22:33 - INFO - llmtuner.extras.callbacks - {'loss': 0.8388, 'learning_rate': 1.6000e-04, 'epoch': 0.80} |
|
|
|
05/15/2024 23:23:02 - INFO - llmtuner.extras.callbacks - {'loss': 0.8452, 'learning_rate': 2.0000e-04, 'epoch': 1.00} |
|
|
|
05/15/2024 23:23:46 - INFO - llmtuner.extras.callbacks - {'loss': 0.6138, 'learning_rate': 1.6000e-04, 'epoch': 1.20} |
|
|
|
05/15/2024 23:24:28 - INFO - llmtuner.extras.callbacks - {'loss': 0.5858, 'learning_rate': 1.2000e-04, 'epoch': 1.40} |
|
|
|
05/15/2024 23:25:07 - INFO - llmtuner.extras.callbacks - {'loss': 0.7195, 'learning_rate': 8.0000e-05, 'epoch': 1.60} |
|
|
|
05/15/2024 23:25:26 - INFO - llmtuner.extras.callbacks - {'loss': 1.0479, 'learning_rate': 4.0000e-05, 'epoch': 1.80} |
|
|
|
05/15/2024 23:25:55 - INFO - llmtuner.extras.callbacks - {'loss': 0.9978, 'learning_rate': 0.0000e+00, 'epoch': 2.00} |
|
|
|
05/15/2024 23:25:55 - INFO - transformers.trainer - |
|
|
|
Training completed. Do not forget to share your model on huggingface.co/models =) |
|
|
|
|
|
|
|
05/15/2024 23:25:55 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen1.5-Code-7B/lora/train_scopai1 |
|
|
|
05/15/2024 23:25:57 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/f200309/.cache/huggingface/hub/models--Qwen--CodeQwen1.5-7B/snapshots/5ce5a1554e50a9e3bb236de7c0b8a2a1746186e4/config.json |
|
|
|
05/15/2024 23:25:57 - INFO - transformers.configuration_utils - Model config Qwen2Config { |
|
"architectures": [ |
|
"Qwen2ForCausalLM" |
|
], |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 2, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 13440, |
|
"max_position_embeddings": 65536, |
|
"max_window_layers": 28, |
|
"model_type": "qwen2", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 4, |
|
"rms_norm_eps": 1e-05, |
|
"rope_theta": 1000000, |
|
"rotary_emb_base": 1000000, |
|
"seq_length": 65536, |
|
"sliding_window": 65536, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.41.0.dev0", |
|
"use_cache": true, |
|
"use_sliding_window": false, |
|
"vocab_size": 92416 |
|
} |
|
|
|
|
|
05/15/2024 23:25:57 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen1.5-Code-7B/lora/train_scopai1/tokenizer_config.json |
|
|
|
05/15/2024 23:25:57 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen1.5-Code-7B/lora/train_scopai1/special_tokens_map.json |
|
|
|
05/15/2024 23:25:57 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields: |
|
{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}} |
|
|
|
|