stinkypoop commited on
Commit
4fe12bf
·
verified ·
1 Parent(s): 31622a8

Delete QwenTT-0.5B-INT8

Browse files
QwenTT-0.5B-INT8/README.md DELETED
@@ -1,59 +0,0 @@
1
- ---
2
- license: other
3
- library_name: peft
4
- tags:
5
- - llama-factory
6
- - lora
7
- - generated_from_trainer
8
- base_model: Qwen/Qwen1.5-0.5B-Chat
9
- model-index:
10
- - name: QwenTT-0.5B-INT8
11
- results: []
12
- ---
13
-
14
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
- should probably proofread and complete it, then remove this comment. -->
16
-
17
- # QwenTT-0.5B-INT8
18
-
19
- This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the identity dataset.
20
-
21
- ## Model description
22
-
23
- More information needed
24
-
25
- ## Intended uses & limitations
26
-
27
- More information needed
28
-
29
- ## Training and evaluation data
30
-
31
- More information needed
32
-
33
- ## Training procedure
34
-
35
- ### Training hyperparameters
36
-
37
- The following hyperparameters were used during training:
38
- - learning_rate: 5e-05
39
- - train_batch_size: 2
40
- - eval_batch_size: 8
41
- - seed: 42
42
- - gradient_accumulation_steps: 8
43
- - total_train_batch_size: 16
44
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
- - lr_scheduler_type: cosine
46
- - num_epochs: 3.0
47
- - mixed_precision_training: Native AMP
48
-
49
- ### Training results
50
-
51
-
52
-
53
- ### Framework versions
54
-
55
- - PEFT 0.11.1
56
- - Transformers 4.40.2
57
- - Pytorch 2.2.1+cu121
58
- - Datasets 2.19.1
59
- - Tokenizers 0.19.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/added_tokens.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "<|endoftext|>": 151643,
3
- "<|im_end|>": 151645,
4
- "<|im_start|>": 151644
5
- }
 
 
 
 
 
 
QwenTT-0.5B-INT8/all_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.608695652173913,
3
- "total_flos": 27871774801920.0,
4
- "train_loss": 3.3604891459147135,
5
- "train_runtime": 81.3934,
6
- "train_samples_per_second": 3.354,
7
- "train_steps_per_second": 0.184
8
- }
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "alpha_pattern": {},
3
- "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
5
- "bias": "none",
6
- "fan_in_fan_out": false,
7
- "inference_mode": true,
8
- "init_lora_weights": true,
9
- "layer_replication": null,
10
- "layers_pattern": null,
11
- "layers_to_transform": null,
12
- "loftq_config": {},
13
- "lora_alpha": 16,
14
- "lora_dropout": 0,
15
- "megatron_config": null,
16
- "megatron_core": "megatron.core",
17
- "modules_to_save": null,
18
- "peft_type": "LORA",
19
- "r": 8,
20
- "rank_pattern": {},
21
- "revision": null,
22
- "target_modules": [
23
- "v_proj",
24
- "q_proj"
25
- ],
26
- "task_type": "CAUSAL_LM",
27
- "use_dora": false,
28
- "use_rslora": false
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
QwenTT-0.5B-INT8/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe5be185b3c4eac11027a7757c117526bec685ef0b8f1b04cd85732532d1060
3
- size 3158328
 
 
 
 
QwenTT-0.5B-INT8/running_log.txt DELETED
@@ -1,165 +0,0 @@
1
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/vocab.json
2
-
3
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/merges.txt
4
-
5
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/tokenizer.json
6
-
7
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at None
8
-
9
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at None
10
-
11
- 05/19/2024 22:55:50 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/tokenizer_config.json
12
-
13
- 05/19/2024 22:55:51 - WARNING - transformers.tokenization_utils_base - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
14
-
15
- 05/19/2024 22:55:51 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>
16
-
17
- 05/19/2024 22:55:51 - INFO - llamafactory.data.loader - Loading dataset identity.json...
18
-
19
- 05/19/2024 22:55:58 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/config.json
20
-
21
- 05/19/2024 22:55:58 - INFO - transformers.configuration_utils - Model config Qwen2Config {
22
- "_name_or_path": "Qwen/Qwen1.5-0.5B-Chat",
23
- "architectures": [
24
- "Qwen2ForCausalLM"
25
- ],
26
- "attention_dropout": 0.0,
27
- "bos_token_id": 151643,
28
- "eos_token_id": 151645,
29
- "hidden_act": "silu",
30
- "hidden_size": 1024,
31
- "initializer_range": 0.02,
32
- "intermediate_size": 2816,
33
- "max_position_embeddings": 32768,
34
- "max_window_layers": 21,
35
- "model_type": "qwen2",
36
- "num_attention_heads": 16,
37
- "num_hidden_layers": 24,
38
- "num_key_value_heads": 16,
39
- "rms_norm_eps": 1e-06,
40
- "rope_theta": 1000000.0,
41
- "sliding_window": 32768,
42
- "tie_word_embeddings": true,
43
- "torch_dtype": "bfloat16",
44
- "transformers_version": "4.40.2",
45
- "use_cache": true,
46
- "use_sliding_window": false,
47
- "vocab_size": 151936
48
- }
49
-
50
-
51
- 05/19/2024 22:55:58 - INFO - llamafactory.model.utils.quantization - Quantizing model to 8 bit.
52
-
53
- 05/19/2024 22:55:58 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/model.safetensors
54
-
55
- 05/19/2024 22:55:58 - INFO - transformers.modeling_utils - Instantiating Qwen2ForCausalLM model under default dtype torch.float16.
56
-
57
- 05/19/2024 22:55:58 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
58
- "bos_token_id": 151643,
59
- "eos_token_id": 151645
60
- }
61
-
62
-
63
- 05/19/2024 22:56:02 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing Qwen2ForCausalLM.
64
-
65
-
66
- 05/19/2024 22:56:02 - INFO - transformers.modeling_utils - All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen1.5-0.5B-Chat.
67
- If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.
68
-
69
- 05/19/2024 22:56:02 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/generation_config.json
70
-
71
- 05/19/2024 22:56:02 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {
72
- "bos_token_id": 151643,
73
- "do_sample": true,
74
- "eos_token_id": [
75
- 151645,
76
- 151643
77
- ],
78
- "pad_token_id": 151643,
79
- "repetition_penalty": 1.1,
80
- "top_p": 0.8
81
- }
82
-
83
-
84
- 05/19/2024 22:56:02 - INFO - llamafactory.model.utils.checkpointing - Gradient checkpointing enabled.
85
-
86
- 05/19/2024 22:56:02 - INFO - llamafactory.model.utils.attention - Using torch SDPA for faster training and inference.
87
-
88
- 05/19/2024 22:56:02 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.
89
-
90
- 05/19/2024 22:56:02 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA
91
-
92
- 05/19/2024 22:56:02 - INFO - llamafactory.model.loader - trainable params: 786432 || all params: 464774144 || trainable%: 0.1692
93
-
94
- 05/19/2024 22:56:02 - INFO - transformers.trainer - Using auto half precision backend
95
-
96
- 05/19/2024 22:56:03 - INFO - transformers.trainer - ***** Running training *****
97
-
98
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Num examples = 91
99
-
100
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Num Epochs = 3
101
-
102
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Instantaneous batch size per device = 2
103
-
104
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16
105
-
106
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Gradient Accumulation steps = 8
107
-
108
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Total optimization steps = 15
109
-
110
- 05/19/2024 22:56:03 - INFO - transformers.trainer - Number of trainable parameters = 786,432
111
-
112
- 05/19/2024 22:56:30 - INFO - llamafactory.extras.callbacks - {'loss': 3.4258, 'learning_rate': 3.7500e-05, 'epoch': 0.87}
113
-
114
- 05/19/2024 22:56:57 - INFO - llamafactory.extras.callbacks - {'loss': 3.3578, 'learning_rate': 1.2500e-05, 'epoch': 1.74}
115
-
116
- 05/19/2024 22:57:24 - INFO - llamafactory.extras.callbacks - {'loss': 3.2979, 'learning_rate': 0.0000e+00, 'epoch': 2.61}
117
-
118
- 05/19/2024 22:57:24 - INFO - transformers.trainer -
119
-
120
- Training completed. Do not forget to share your model on huggingface.co/models =)
121
-
122
-
123
-
124
- 05/19/2024 22:57:24 - INFO - transformers.trainer - Saving model checkpoint to saves/Qwen1.5-0.5B-Chat/lora/QwenTT-0.5B-INT8
125
-
126
- 05/19/2024 22:57:25 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen1.5-0.5B-Chat/snapshots/4d14e384a4b037942bb3f3016665157c8bcb70ea/config.json
127
-
128
- 05/19/2024 22:57:25 - INFO - transformers.configuration_utils - Model config Qwen2Config {
129
- "architectures": [
130
- "Qwen2ForCausalLM"
131
- ],
132
- "attention_dropout": 0.0,
133
- "bos_token_id": 151643,
134
- "eos_token_id": 151645,
135
- "hidden_act": "silu",
136
- "hidden_size": 1024,
137
- "initializer_range": 0.02,
138
- "intermediate_size": 2816,
139
- "max_position_embeddings": 32768,
140
- "max_window_layers": 21,
141
- "model_type": "qwen2",
142
- "num_attention_heads": 16,
143
- "num_hidden_layers": 24,
144
- "num_key_value_heads": 16,
145
- "rms_norm_eps": 1e-06,
146
- "rope_theta": 1000000.0,
147
- "sliding_window": 32768,
148
- "tie_word_embeddings": true,
149
- "torch_dtype": "bfloat16",
150
- "transformers_version": "4.40.2",
151
- "use_cache": true,
152
- "use_sliding_window": false,
153
- "vocab_size": 151936
154
- }
155
-
156
-
157
- 05/19/2024 22:57:25 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Qwen1.5-0.5B-Chat/lora/QwenTT-0.5B-INT8/tokenizer_config.json
158
-
159
- 05/19/2024 22:57:25 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Qwen1.5-0.5B-Chat/lora/QwenTT-0.5B-INT8/special_tokens_map.json
160
-
161
- 05/19/2024 22:57:25 - WARNING - llamafactory.extras.ploting - No metric eval_loss to plot.
162
-
163
- 05/19/2024 22:57:25 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
164
- {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
165
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/special_tokens_map.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
- ],
6
- "eos_token": {
7
- "content": "<|im_end|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "pad_token": {
14
- "content": "<|endoftext|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- }
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
QwenTT-0.5B-INT8/tokenizer_config.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "151644": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "151645": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
- "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
35
- "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|im_end|>",
37
- "errors": "replace",
38
- "model_max_length": 32768,
39
- "pad_token": "<|endoftext|>",
40
- "padding_side": "right",
41
- "split_special_tokens": false,
42
- "tokenizer_class": "Qwen2Tokenizer",
43
- "unk_token": null
44
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/train_results.json DELETED
@@ -1,8 +0,0 @@
1
- {
2
- "epoch": 2.608695652173913,
3
- "total_flos": 27871774801920.0,
4
- "train_loss": 3.3604891459147135,
5
- "train_runtime": 81.3934,
6
- "train_samples_per_second": 3.354,
7
- "train_steps_per_second": 0.184
8
- }
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/trainer_config.yaml DELETED
@@ -1,31 +0,0 @@
1
- cutoff_len: 1024
2
- dataset: identity
3
- dataset_dir: data
4
- do_train: true
5
- finetuning_type: lora
6
- flash_attn: auto
7
- fp16: true
8
- gradient_accumulation_steps: 8
9
- learning_rate: 5.0e-05
10
- logging_steps: 5
11
- lora_alpha: 16
12
- lora_dropout: 0
13
- lora_rank: 8
14
- lora_target: q_proj,v_proj
15
- lr_scheduler_type: cosine
16
- max_grad_norm: 1.0
17
- max_samples: 100000
18
- model_name_or_path: Qwen/Qwen1.5-0.5B-Chat
19
- num_train_epochs: 3.0
20
- optim: adamw_torch
21
- output_dir: saves/Qwen1.5-0.5B-Chat/lora/QwenTT-0.5B-INT8
22
- packing: false
23
- per_device_train_batch_size: 2
24
- plot_loss: true
25
- preprocessing_num_workers: 16
26
- quantization_bit: 8
27
- report_to: none
28
- save_steps: 100
29
- stage: sft
30
- template: qwen
31
- warmup_steps: 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/trainer_log.jsonl DELETED
@@ -1,4 +0,0 @@
1
- {"current_steps": 5, "total_steps": 15, "loss": 3.4258, "learning_rate": 3.7500000000000003e-05, "epoch": 0.8695652173913043, "percentage": 33.33, "elapsed_time": "0:00:27", "remaining_time": "0:00:54"}
2
- {"current_steps": 10, "total_steps": 15, "loss": 3.3578, "learning_rate": 1.2500000000000006e-05, "epoch": 1.7391304347826086, "percentage": 66.67, "elapsed_time": "0:00:54", "remaining_time": "0:00:27"}
3
- {"current_steps": 15, "total_steps": 15, "loss": 3.2979, "learning_rate": 0.0, "epoch": 2.608695652173913, "percentage": 100.0, "elapsed_time": "0:01:21", "remaining_time": "0:00:00"}
4
- {"current_steps": 15, "total_steps": 15, "epoch": 2.608695652173913, "percentage": 100.0, "elapsed_time": "0:01:21", "remaining_time": "0:00:00"}
 
 
 
 
 
QwenTT-0.5B-INT8/trainer_state.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.608695652173913,
5
- "eval_steps": 500,
6
- "global_step": 15,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.8695652173913043,
13
- "grad_norm": 1.6034659147262573,
14
- "learning_rate": 3.7500000000000003e-05,
15
- "loss": 3.4258,
16
- "step": 5
17
- },
18
- {
19
- "epoch": 1.7391304347826086,
20
- "grad_norm": 1.0319217443466187,
21
- "learning_rate": 1.2500000000000006e-05,
22
- "loss": 3.3578,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 2.608695652173913,
27
- "grad_norm": 1.2878139019012451,
28
- "learning_rate": 0.0,
29
- "loss": 3.2979,
30
- "step": 15
31
- },
32
- {
33
- "epoch": 2.608695652173913,
34
- "step": 15,
35
- "total_flos": 27871774801920.0,
36
- "train_loss": 3.3604891459147135,
37
- "train_runtime": 81.3934,
38
- "train_samples_per_second": 3.354,
39
- "train_steps_per_second": 0.184
40
- }
41
- ],
42
- "logging_steps": 5,
43
- "max_steps": 15,
44
- "num_input_tokens_seen": 0,
45
- "num_train_epochs": 3,
46
- "save_steps": 100,
47
- "total_flos": 27871774801920.0,
48
- "train_batch_size": 2,
49
- "trial_name": null,
50
- "trial_params": null
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
QwenTT-0.5B-INT8/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c4d8f46ae72d6f97f06fdd5f7f5b232339172b58af2232de6515d3688c44bf
3
- size 5176
 
 
 
 
QwenTT-0.5B-INT8/training_loss.png DELETED
Binary file (39.5 kB)
 
QwenTT-0.5B-INT8/vocab.json DELETED
The diff for this file is too large to render. See raw diff