BlackHorseTeck commited on
Commit
33e0752
1 Parent(s): ee67685

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,63 @@
1
  ---
2
  license: mit
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: mit
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: microsoft/phi-1_5
9
+ model-index:
10
+ - name: train_2024-05-09-19-57-19
11
+ results: []
12
+ datasets:
13
+ - llamafactory/alpaca_en
14
+ language:
15
+ - en
16
  ---
17
+
18
+
19
+ # Bahia 1.0 Based on Phi 1.5 trained using Alpaca_en
20
+
21
+ This model is a fine-tuned version of [microsoft/phi-1_5](https://huggingface.co/microsoft/phi-1_5) on the alpaca_en dataset.
22
+
23
+ ## Model description
24
+
25
+ This is the First LLM coming from the Mediterranean city of Oran (nicknamed El-Bahia), and possibly the first North African LLM trained model, More to come from us soon ;-)
26
+
27
+ ## Intended uses & limitations
28
+
29
+ This comes under an MIT lisence, enjoy!
30
+
31
+ ## Training and evaluation data
32
+
33
+ Trained using Alpaca_en from Llama Factory
34
+
35
+ ## Training procedure
36
+
37
+ Locally trained on baremetal hardware on an AMD GPU under Linux Mint 21
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0002
43
+ - train_batch_size: 2
44
+ - eval_batch_size: 8
45
+ - seed: 42
46
+ - gradient_accumulation_steps: 8
47
+ - total_train_batch_size: 16
48
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
+ - lr_scheduler_type: cosine
50
+ - num_epochs: 3.0
51
+ - mixed_precision_training: Native AMP
52
+
53
+ ### Training results
54
+
55
+ Please check the files in the files and versions.
56
+
57
+ ### Framework versions
58
+
59
+ - PEFT 0.10.0
60
+ - Transformers 4.40.1
61
+ - Pytorch 2.3.0+rocm5.7
62
+ - Datasets 2.19.1
63
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "microsoft/phi-1_5",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565170c74ac08713fee3936749c983f89c74ad7d341572a703d900b01031f52c
3
+ size 6304096
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.976,
3
+ "total_flos": 1234235324694528.0,
4
+ "train_loss": 1.4291518221619308,
5
+ "train_runtime": 192.8949,
6
+ "train_samples_per_second": 7.776,
7
+ "train_steps_per_second": 0.482
8
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f562cc4b12f1154a0b658b9d74ab9d024a686420fdf154ebc00a2219976df7
3
+ size 2836578696
running_log.txt ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file vocab.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/vocab.json
2
+
3
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file merges.txt from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/merges.txt
4
+
5
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file tokenizer.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/tokenizer.json
6
+
7
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file added_tokens.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/added_tokens.json
8
+
9
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file special_tokens_map.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/special_tokens_map.json
10
+
11
+ 05/09/2024 19:59:23 - INFO - transformers.tokenization_utils_base - loading file tokenizer_config.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/tokenizer_config.json
12
+
13
+ 05/09/2024 19:59:23 - INFO - llmtuner.data.template - Add pad token: <|endoftext|>
14
+
15
+ 05/09/2024 19:59:23 - INFO - llmtuner.data.loader - Loading dataset alpaca_data_en_52k.json...
16
+
17
+ 05/09/2024 19:59:24 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/config.json
18
+
19
+ 05/09/2024 19:59:24 - INFO - transformers.configuration_utils - Model config PhiConfig {
20
+ "_name_or_path": "microsoft/phi-1_5",
21
+ "architectures": [
22
+ "PhiForCausalLM"
23
+ ],
24
+ "attention_dropout": 0.0,
25
+ "bos_token_id": null,
26
+ "embd_pdrop": 0.0,
27
+ "eos_token_id": null,
28
+ "hidden_act": "gelu_new",
29
+ "hidden_size": 2048,
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 8192,
32
+ "layer_norm_eps": 1e-05,
33
+ "max_position_embeddings": 2048,
34
+ "model_type": "phi",
35
+ "num_attention_heads": 32,
36
+ "num_hidden_layers": 24,
37
+ "num_key_value_heads": 32,
38
+ "partial_rotary_factor": 0.5,
39
+ "qk_layernorm": false,
40
+ "resid_pdrop": 0.0,
41
+ "rope_scaling": null,
42
+ "rope_theta": 10000.0,
43
+ "tie_word_embeddings": false,
44
+ "torch_dtype": "float16",
45
+ "transformers_version": "4.40.1",
46
+ "use_cache": true,
47
+ "vocab_size": 51200
48
+ }
49
+
50
+
51
+ 05/09/2024 20:08:34 - INFO - transformers.modeling_utils - loading weights file model.safetensors from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/model.safetensors
52
+
53
+ 05/09/2024 20:08:34 - INFO - transformers.modeling_utils - Instantiating PhiForCausalLM model under default dtype torch.float16.
54
+
55
+ 05/09/2024 20:08:34 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {}
56
+
57
+
58
+ 05/09/2024 20:08:35 - INFO - transformers.modeling_utils - All model checkpoint weights were used when initializing PhiForCausalLM.
59
+
60
+
61
+ 05/09/2024 20:08:35 - INFO - transformers.modeling_utils - All the weights of PhiForCausalLM were initialized from the model checkpoint at microsoft/phi-1_5.
62
+ If your task is similar to the task the model of the checkpoint was trained on, you can already use PhiForCausalLM for predictions without further training.
63
+
64
+ 05/09/2024 20:08:36 - INFO - transformers.generation.configuration_utils - loading configuration file generation_config.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/generation_config.json
65
+
66
+ 05/09/2024 20:08:36 - INFO - transformers.generation.configuration_utils - Generate config GenerationConfig {}
67
+
68
+
69
+ 05/09/2024 20:08:36 - INFO - llmtuner.model.utils.checkpointing - Gradient checkpointing enabled.
70
+
71
+ 05/09/2024 20:08:36 - INFO - llmtuner.model.utils.attention - Using torch SDPA for faster training and inference.
72
+
73
+ 05/09/2024 20:08:36 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
74
+
75
+ 05/09/2024 20:08:36 - INFO - llmtuner.model.loader - trainable params: 1572864 || all params: 1419843584 || trainable%: 0.1108
76
+
77
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Using auto half precision backend
78
+
79
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - ***** Running training *****
80
+
81
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Num examples = 500
82
+
83
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Num Epochs = 3
84
+
85
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Instantaneous batch size per device = 2
86
+
87
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Total train batch size (w. parallel, distributed & accumulation) = 16
88
+
89
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Gradient Accumulation steps = 8
90
+
91
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Total optimization steps = 93
92
+
93
+ 05/09/2024 20:08:36 - INFO - transformers.trainer - Number of trainable parameters = 1,572,864
94
+
95
+ 05/09/2024 20:08:47 - INFO - llmtuner.extras.callbacks - {'loss': 2.1283, 'learning_rate': 1.9858e-04, 'epoch': 0.16}
96
+
97
+ 05/09/2024 20:08:57 - INFO - llmtuner.extras.callbacks - {'loss': 1.8000, 'learning_rate': 1.9435e-04, 'epoch': 0.32}
98
+
99
+ 05/09/2024 20:09:07 - INFO - llmtuner.extras.callbacks - {'loss': 1.5595, 'learning_rate': 1.8743e-04, 'epoch': 0.48}
100
+
101
+ 05/09/2024 20:09:18 - INFO - llmtuner.extras.callbacks - {'loss': 1.6097, 'learning_rate': 1.7803e-04, 'epoch': 0.64}
102
+
103
+ 05/09/2024 20:09:29 - INFO - llmtuner.extras.callbacks - {'loss': 1.2942, 'learning_rate': 1.6641e-04, 'epoch': 0.80}
104
+
105
+ 05/09/2024 20:09:38 - INFO - llmtuner.extras.callbacks - {'loss': 1.4620, 'learning_rate': 1.5290e-04, 'epoch': 0.96}
106
+
107
+ 05/09/2024 20:09:48 - INFO - llmtuner.extras.callbacks - {'loss': 1.4432, 'learning_rate': 1.3788e-04, 'epoch': 1.12}
108
+
109
+ 05/09/2024 20:09:59 - INFO - llmtuner.extras.callbacks - {'loss': 1.3561, 'learning_rate': 1.2178e-04, 'epoch': 1.28}
110
+
111
+ 05/09/2024 20:10:09 - INFO - llmtuner.extras.callbacks - {'loss': 1.3299, 'learning_rate': 1.0506e-04, 'epoch': 1.44}
112
+
113
+ 05/09/2024 20:10:19 - INFO - llmtuner.extras.callbacks - {'loss': 1.2208, 'learning_rate': 8.8204e-05, 'epoch': 1.60}
114
+
115
+ 05/09/2024 20:10:30 - INFO - llmtuner.extras.callbacks - {'loss': 1.4574, 'learning_rate': 7.1679e-05, 'epoch': 1.76}
116
+
117
+ 05/09/2024 20:10:41 - INFO - llmtuner.extras.callbacks - {'loss': 1.1423, 'learning_rate': 5.5961e-05, 'epoch': 1.92}
118
+
119
+ 05/09/2024 20:10:50 - INFO - llmtuner.extras.callbacks - {'loss': 1.3134, 'learning_rate': 4.1495e-05, 'epoch': 2.08}
120
+
121
+ 05/09/2024 20:11:01 - INFO - llmtuner.extras.callbacks - {'loss': 1.3384, 'learning_rate': 2.8695e-05, 'epoch': 2.24}
122
+
123
+ 05/09/2024 20:11:12 - INFO - llmtuner.extras.callbacks - {'loss': 1.3873, 'learning_rate': 1.7924e-05, 'epoch': 2.40}
124
+
125
+ 05/09/2024 20:11:22 - INFO - llmtuner.extras.callbacks - {'loss': 1.2214, 'learning_rate': 9.4885e-06, 'epoch': 2.56}
126
+
127
+ 05/09/2024 20:11:32 - INFO - llmtuner.extras.callbacks - {'loss': 1.3198, 'learning_rate': 3.6294e-06, 'epoch': 2.72}
128
+
129
+ 05/09/2024 20:11:42 - INFO - llmtuner.extras.callbacks - {'loss': 1.3832, 'learning_rate': 5.1307e-07, 'epoch': 2.88}
130
+
131
+ 05/09/2024 20:11:49 - INFO - transformers.trainer -
132
+
133
+ Training completed. Do not forget to share your model on huggingface.co/models =)
134
+
135
+
136
+
137
+ 05/09/2024 20:11:49 - INFO - transformers.trainer - Saving model checkpoint to saves/Phi-1.5-1.3B/lora/train_2024-05-09-19-57-19
138
+
139
+ 05/09/2024 20:11:49 - INFO - transformers.configuration_utils - loading configuration file config.json from cache at /home/.cache/huggingface/hub/models--microsoft--phi-1_5/snapshots/675aa382d814580b22651a30acb1a585d7c25963/config.json
140
+
141
+ 05/09/2024 20:11:49 - INFO - transformers.configuration_utils - Model config PhiConfig {
142
+ "_name_or_path": "microsoft/phi-1_5",
143
+ "architectures": [
144
+ "PhiForCausalLM"
145
+ ],
146
+ "attention_dropout": 0.0,
147
+ "bos_token_id": null,
148
+ "embd_pdrop": 0.0,
149
+ "eos_token_id": null,
150
+ "hidden_act": "gelu_new",
151
+ "hidden_size": 2048,
152
+ "initializer_range": 0.02,
153
+ "intermediate_size": 8192,
154
+ "layer_norm_eps": 1e-05,
155
+ "max_position_embeddings": 2048,
156
+ "model_type": "phi",
157
+ "num_attention_heads": 32,
158
+ "num_hidden_layers": 24,
159
+ "num_key_value_heads": 32,
160
+ "partial_rotary_factor": 0.5,
161
+ "qk_layernorm": false,
162
+ "resid_pdrop": 0.0,
163
+ "rope_scaling": null,
164
+ "rope_theta": 10000.0,
165
+ "tie_word_embeddings": false,
166
+ "torch_dtype": "float16",
167
+ "transformers_version": "4.40.1",
168
+ "use_cache": true,
169
+ "vocab_size": 51200
170
+ }
171
+
172
+
173
+ 05/09/2024 20:11:49 - INFO - transformers.tokenization_utils_base - tokenizer config file saved in saves/Phi-1.5-1.3B/lora/train_2024-05-09-19-57-19/tokenizer_config.json
174
+
175
+ 05/09/2024 20:11:49 - INFO - transformers.tokenization_utils_base - Special tokens file saved in saves/Phi-1.5-1.3B/lora/train_2024-05-09-19-57-19/special_tokens_map.json
176
+
177
+ 05/09/2024 20:11:49 - INFO - transformers.modelcard - Dropping the following result as it does not have all the necessary fields:
178
+ {'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}}
179
+
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": " ",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50260": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50261": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50262": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50263": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50264": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50265": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50266": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50267": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50268": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50269": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50270": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50271": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50272": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50273": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50274": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50275": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50276": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50277": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50278": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50279": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50280": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50281": {
205
+ "content": " ",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50282": {
213
+ "content": " ",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50283": {
221
+ "content": " ",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50284": {
229
+ "content": " ",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50285": {
237
+ "content": " ",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50286": {
245
+ "content": " ",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50287": {
253
+ "content": "\t\t\t\t\t\t\t\t\t",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50288": {
261
+ "content": "\t\t\t\t\t\t\t\t",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50289": {
269
+ "content": "\t\t\t\t\t\t\t",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50290": {
277
+ "content": "\t\t\t\t\t\t",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50291": {
285
+ "content": "\t\t\t\t\t",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50292": {
293
+ "content": "\t\t\t\t",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "50293": {
301
+ "content": "\t\t\t",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "50294": {
309
+ "content": "\t\t",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ }
316
+ },
317
+ "bos_token": "<|endoftext|>",
318
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message + '\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ 'Human: ' + content + '\\nAssistant: ' }}{% elif message['role'] == 'assistant' %}{{ content + '<|endoftext|>' + '\\n' }}{% endif %}{% endfor %}",
319
+ "clean_up_tokenization_spaces": true,
320
+ "eos_token": "<|endoftext|>",
321
+ "model_max_length": 2048,
322
+ "pad_token": "<|endoftext|>",
323
+ "padding_side": "right",
324
+ "return_token_type_ids": false,
325
+ "split_special_tokens": false,
326
+ "tokenizer_class": "CodeGenTokenizer",
327
+ "unk_token": "<|endoftext|>"
328
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.976,
3
+ "total_flos": 1234235324694528.0,
4
+ "train_loss": 1.4291518221619308,
5
+ "train_runtime": 192.8949,
6
+ "train_samples_per_second": 7.776,
7
+ "train_steps_per_second": 0.482
8
+ }
trainer_config.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cutoff_len: 1024
2
+ dataset: alpaca_en
3
+ dataset_dir: data
4
+ do_train: true
5
+ finetuning_type: lora
6
+ flash_attn: auto
7
+ fp16: true
8
+ gradient_accumulation_steps: 8
9
+ learning_rate: 0.0002
10
+ logging_steps: 5
11
+ lora_alpha: 16
12
+ lora_dropout: 0
13
+ lora_rank: 8
14
+ lora_target: q_proj,v_proj
15
+ lr_scheduler_type: cosine
16
+ max_grad_norm: 1.0
17
+ max_samples: 500
18
+ model_name_or_path: microsoft/phi-1_5
19
+ num_train_epochs: 3.0
20
+ optim: adamw_torch
21
+ output_dir: saves/Phi-1.5-1.3B/lora/train_2024-05-09-19-57-19
22
+ packing: false
23
+ per_device_train_batch_size: 2
24
+ report_to: none
25
+ save_steps: 100
26
+ stage: sft
27
+ template: default
28
+ warmup_steps: 0
trainer_log.jsonl ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 5, "total_steps": 93, "loss": 2.1283, "learning_rate": 0.00019857697953148037, "epoch": 0.16, "percentage": 5.38, "elapsed_time": "0:00:10", "remaining_time": "0:03:10"}
2
+ {"current_steps": 10, "total_steps": 93, "loss": 1.8, "learning_rate": 0.00019434841787099803, "epoch": 0.32, "percentage": 10.75, "elapsed_time": "0:00:20", "remaining_time": "0:02:51"}
3
+ {"current_steps": 15, "total_steps": 93, "loss": 1.5595, "learning_rate": 0.00018743466161445823, "epoch": 0.48, "percentage": 16.13, "elapsed_time": "0:00:30", "remaining_time": "0:02:39"}
4
+ {"current_steps": 20, "total_steps": 93, "loss": 1.6097, "learning_rate": 0.0001780324790952092, "epoch": 0.64, "percentage": 21.51, "elapsed_time": "0:00:41", "remaining_time": "0:02:31"}
5
+ {"current_steps": 25, "total_steps": 93, "loss": 1.2942, "learning_rate": 0.00016640946027672392, "epoch": 0.8, "percentage": 26.88, "elapsed_time": "0:00:52", "remaining_time": "0:02:23"}
6
+ {"current_steps": 30, "total_steps": 93, "loss": 1.462, "learning_rate": 0.00015289640103269625, "epoch": 0.96, "percentage": 32.26, "elapsed_time": "0:01:02", "remaining_time": "0:02:10"}
7
+ {"current_steps": 35, "total_steps": 93, "loss": 1.4432, "learning_rate": 0.0001378778885610576, "epoch": 1.12, "percentage": 37.63, "elapsed_time": "0:01:12", "remaining_time": "0:02:00"}
8
+ {"current_steps": 40, "total_steps": 93, "loss": 1.3561, "learning_rate": 0.00012178135587488515, "epoch": 1.28, "percentage": 43.01, "elapsed_time": "0:01:23", "remaining_time": "0:01:50"}
9
+ {"current_steps": 45, "total_steps": 93, "loss": 1.3299, "learning_rate": 0.00010506491688387127, "epoch": 1.44, "percentage": 48.39, "elapsed_time": "0:01:33", "remaining_time": "0:01:39"}
10
+ {"current_steps": 50, "total_steps": 93, "loss": 1.2208, "learning_rate": 8.820432828491542e-05, "epoch": 1.6, "percentage": 53.76, "elapsed_time": "0:01:43", "remaining_time": "0:01:29"}
11
+ {"current_steps": 55, "total_steps": 93, "loss": 1.4574, "learning_rate": 7.16794493317696e-05, "epoch": 1.76, "percentage": 59.14, "elapsed_time": "0:01:54", "remaining_time": "0:01:18"}
12
+ {"current_steps": 60, "total_steps": 93, "loss": 1.1423, "learning_rate": 5.596058484423656e-05, "epoch": 1.92, "percentage": 64.52, "elapsed_time": "0:02:04", "remaining_time": "0:01:08"}
13
+ {"current_steps": 65, "total_steps": 93, "loss": 1.3134, "learning_rate": 4.149510014046922e-05, "epoch": 2.08, "percentage": 69.89, "elapsed_time": "0:02:14", "remaining_time": "0:00:57"}
14
+ {"current_steps": 70, "total_steps": 93, "loss": 1.3384, "learning_rate": 2.869468883687798e-05, "epoch": 2.24, "percentage": 75.27, "elapsed_time": "0:02:24", "remaining_time": "0:00:47"}
15
+ {"current_steps": 75, "total_steps": 93, "loss": 1.3873, "learning_rate": 1.7923655879272393e-05, "epoch": 2.4, "percentage": 80.65, "elapsed_time": "0:02:35", "remaining_time": "0:00:37"}
16
+ {"current_steps": 80, "total_steps": 93, "loss": 1.2214, "learning_rate": 9.488549274967872e-06, "epoch": 2.56, "percentage": 86.02, "elapsed_time": "0:02:45", "remaining_time": "0:00:26"}
17
+ {"current_steps": 85, "total_steps": 93, "loss": 1.3198, "learning_rate": 3.6294356110059157e-06, "epoch": 2.7199999999999998, "percentage": 91.4, "elapsed_time": "0:02:56", "remaining_time": "0:00:16"}
18
+ {"current_steps": 90, "total_steps": 93, "loss": 1.3832, "learning_rate": 5.130676608104845e-07, "epoch": 2.88, "percentage": 96.77, "elapsed_time": "0:03:06", "remaining_time": "0:00:06"}
19
+ {"current_steps": 93, "total_steps": 93, "epoch": 2.976, "percentage": 100.0, "elapsed_time": "0:03:12", "remaining_time": "0:00:00"}
trainer_state.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.976,
5
+ "eval_steps": 500,
6
+ "global_step": 93,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "grad_norm": 0.7531044483184814,
14
+ "learning_rate": 0.00019857697953148037,
15
+ "loss": 2.1283,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.32,
20
+ "grad_norm": 2.2025675773620605,
21
+ "learning_rate": 0.00019434841787099803,
22
+ "loss": 1.8,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.48,
27
+ "grad_norm": 0.9267807602882385,
28
+ "learning_rate": 0.00018743466161445823,
29
+ "loss": 1.5595,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.64,
34
+ "grad_norm": 0.6875874996185303,
35
+ "learning_rate": 0.0001780324790952092,
36
+ "loss": 1.6097,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.8,
41
+ "grad_norm": 0.7420657873153687,
42
+ "learning_rate": 0.00016640946027672392,
43
+ "loss": 1.2942,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.96,
48
+ "grad_norm": 0.6181728839874268,
49
+ "learning_rate": 0.00015289640103269625,
50
+ "loss": 1.462,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 1.12,
55
+ "grad_norm": 0.8008699417114258,
56
+ "learning_rate": 0.0001378778885610576,
57
+ "loss": 1.4432,
58
+ "step": 35
59
+ },
60
+ {
61
+ "epoch": 1.28,
62
+ "grad_norm": 1.0314457416534424,
63
+ "learning_rate": 0.00012178135587488515,
64
+ "loss": 1.3561,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 1.44,
69
+ "grad_norm": 0.47113487124443054,
70
+ "learning_rate": 0.00010506491688387127,
71
+ "loss": 1.3299,
72
+ "step": 45
73
+ },
74
+ {
75
+ "epoch": 1.6,
76
+ "grad_norm": 0.41678062081336975,
77
+ "learning_rate": 8.820432828491542e-05,
78
+ "loss": 1.2208,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 1.76,
83
+ "grad_norm": 0.4888109862804413,
84
+ "learning_rate": 7.16794493317696e-05,
85
+ "loss": 1.4574,
86
+ "step": 55
87
+ },
88
+ {
89
+ "epoch": 1.92,
90
+ "grad_norm": 0.3696904480457306,
91
+ "learning_rate": 5.596058484423656e-05,
92
+ "loss": 1.1423,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 2.08,
97
+ "grad_norm": 0.6482197046279907,
98
+ "learning_rate": 4.149510014046922e-05,
99
+ "loss": 1.3134,
100
+ "step": 65
101
+ },
102
+ {
103
+ "epoch": 2.24,
104
+ "grad_norm": 0.7164911031723022,
105
+ "learning_rate": 2.869468883687798e-05,
106
+ "loss": 1.3384,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 2.4,
111
+ "grad_norm": 0.3950205147266388,
112
+ "learning_rate": 1.7923655879272393e-05,
113
+ "loss": 1.3873,
114
+ "step": 75
115
+ },
116
+ {
117
+ "epoch": 2.56,
118
+ "grad_norm": 0.387712687253952,
119
+ "learning_rate": 9.488549274967872e-06,
120
+ "loss": 1.2214,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 2.7199999999999998,
125
+ "grad_norm": 0.2879863679409027,
126
+ "learning_rate": 3.6294356110059157e-06,
127
+ "loss": 1.3198,
128
+ "step": 85
129
+ },
130
+ {
131
+ "epoch": 2.88,
132
+ "grad_norm": 0.9349786043167114,
133
+ "learning_rate": 5.130676608104845e-07,
134
+ "loss": 1.3832,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 2.976,
139
+ "step": 93,
140
+ "total_flos": 1234235324694528.0,
141
+ "train_loss": 1.4291518221619308,
142
+ "train_runtime": 192.8949,
143
+ "train_samples_per_second": 7.776,
144
+ "train_steps_per_second": 0.482
145
+ }
146
+ ],
147
+ "logging_steps": 5,
148
+ "max_steps": 93,
149
+ "num_input_tokens_seen": 0,
150
+ "num_train_epochs": 3,
151
+ "save_steps": 100,
152
+ "total_flos": 1234235324694528.0,
153
+ "train_batch_size": 2,
154
+ "trial_name": null,
155
+ "trial_params": null
156
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad0a98a9af90598fc9c299fcf9af47b320c84b8e983e6ad2d29c806f6b85e99
3
+ size 5176
vocab.json ADDED
The diff for this file is too large to render. See raw diff