WDong commited on
Commit
9c7174e
1 Parent(s): 8ce6205

Upload 17 files

Browse files
README.md CHANGED
@@ -1,3 +1,69 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ library_name: peft
4
+ tags:
5
+ - llama-factory
6
+ - lora
7
+ - generated_from_trainer
8
+ base_model: Qwen2/Qwen2-7B-Instruct
9
+ model-index:
10
+ - name: dpo_06230018_policy2_0.6
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # dpo_06230018_policy2_0.6
18
+
19
+ This model is a fine-tuned version of [/root/LLM_Data_Engineer/LLaMA-Factory/models/Qwen2-7B-Instruct-sft-06221544-iter1-policy2](https://huggingface.co//root/LLM_Data_Engineer/LLaMA-Factory/models/Qwen2-7B-Instruct-sft-06221544-iter1-policy2) on the dpo_data_5370_0621 dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0101
22
+ - Rewards/chosen: 4.3454
23
+ - Rewards/rejected: -5.5676
24
+ - Rewards/accuracies: 1.0
25
+ - Rewards/margins: 9.9130
26
+ - Logps/rejected: -182.1341
27
+ - Logps/chosen: -277.0742
28
+ - Logits/rejected: -1.5388
29
+ - Logits/chosen: -0.4289
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 5e-06
49
+ - train_batch_size: 4
50
+ - eval_batch_size: 1
51
+ - seed: 42
52
+ - gradient_accumulation_steps: 8
53
+ - total_train_batch_size: 32
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: cosine
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3.0
58
+
59
+ ### Training results
60
+
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - PEFT 0.11.1
66
+ - Transformers 4.41.2
67
+ - Pytorch 2.1.2
68
+ - Datasets 2.19.2
69
+ - Tokenizers 0.19.1
adapter_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/root/LLM_Data_Engineer/LLaMA-Factory/models/Qwen2-7B-Instruct-sft-06221544-iter1-policy2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.0,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "gate_proj",
24
+ "k_proj",
25
+ "v_proj",
26
+ "down_proj",
27
+ "up_proj",
28
+ "o_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "use_dora": false,
33
+ "use_rslora": false
34
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f945c80dc64d54b6e63a2b77495896f6f7dfa7cfbc3767b345d8fcd0811ca6
3
+ size 80792096
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.994495412844037,
3
+ "eval_logits/chosen": -0.4288872182369232,
4
+ "eval_logits/rejected": -1.5388305187225342,
5
+ "eval_logps/chosen": -277.0741882324219,
6
+ "eval_logps/rejected": -182.1341094970703,
7
+ "eval_loss": 0.010117708705365658,
8
+ "eval_rewards/accuracies": 1.0,
9
+ "eval_rewards/chosen": 4.345386981964111,
10
+ "eval_rewards/margins": 9.913021087646484,
11
+ "eval_rewards/rejected": -5.567633628845215,
12
+ "eval_runtime": 8.9732,
13
+ "eval_samples_per_second": 5.015,
14
+ "eval_steps_per_second": 5.015,
15
+ "total_flos": 7.837376281021809e+17,
16
+ "train_loss": 0.11720214437923905,
17
+ "train_runtime": 8069.9016,
18
+ "train_samples_per_second": 1.62,
19
+ "train_steps_per_second": 0.051
20
+ }
eval_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.994495412844037,
3
+ "eval_logits/chosen": -0.4288872182369232,
4
+ "eval_logits/rejected": -1.5388305187225342,
5
+ "eval_logps/chosen": -277.0741882324219,
6
+ "eval_logps/rejected": -182.1341094970703,
7
+ "eval_loss": 0.010117708705365658,
8
+ "eval_rewards/accuracies": 1.0,
9
+ "eval_rewards/chosen": 4.345386981964111,
10
+ "eval_rewards/margins": 9.913021087646484,
11
+ "eval_rewards/rejected": -5.567633628845215,
12
+ "eval_runtime": 8.9732,
13
+ "eval_samples_per_second": 5.015,
14
+ "eval_steps_per_second": 5.015
15
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 131072,
39
+ "pad_token": "<|endoftext|>",
40
+ "padding_side": "right",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.994495412844037,
3
+ "total_flos": 7.837376281021809e+17,
4
+ "train_loss": 0.11720214437923905,
5
+ "train_runtime": 8069.9016,
6
+ "train_samples_per_second": 1.62,
7
+ "train_steps_per_second": 0.051
8
+ }
trainer_log.jsonl ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 2, "total_steps": 408, "loss": 0.6931, "accuracy": 0.0, "learning_rate": 2.439024390243903e-07, "epoch": 0.014678899082568808, "percentage": 0.49, "elapsed_time": "0:00:31", "remaining_time": "1:47:44", "throughput": "0.00", "total_tokens": 0}
2
+ {"current_steps": 4, "total_steps": 408, "loss": 0.712, "accuracy": 0.5, "learning_rate": 4.878048780487805e-07, "epoch": 0.029357798165137616, "percentage": 0.98, "elapsed_time": "0:01:20", "remaining_time": "2:15:44", "throughput": "0.00", "total_tokens": 0}
3
+ {"current_steps": 6, "total_steps": 408, "loss": 0.6642, "accuracy": 0.625, "learning_rate": 7.317073170731707e-07, "epoch": 0.044036697247706424, "percentage": 1.47, "elapsed_time": "0:02:07", "remaining_time": "2:22:25", "throughput": "0.00", "total_tokens": 0}
4
+ {"current_steps": 8, "total_steps": 408, "loss": 0.7179, "accuracy": 0.53125, "learning_rate": 9.75609756097561e-07, "epoch": 0.05871559633027523, "percentage": 1.96, "elapsed_time": "0:02:39", "remaining_time": "2:13:18", "throughput": "0.00", "total_tokens": 0}
5
+ {"current_steps": 10, "total_steps": 408, "loss": 0.7397, "accuracy": 0.515625, "learning_rate": 1.2195121951219514e-06, "epoch": 0.07339449541284404, "percentage": 2.45, "elapsed_time": "0:03:21", "remaining_time": "2:13:49", "throughput": "0.00", "total_tokens": 0}
6
+ {"current_steps": 12, "total_steps": 408, "loss": 0.7584, "accuracy": 0.40625, "learning_rate": 1.4634146341463414e-06, "epoch": 0.08807339449541285, "percentage": 2.94, "elapsed_time": "0:03:56", "remaining_time": "2:10:16", "throughput": "0.00", "total_tokens": 0}
7
+ {"current_steps": 14, "total_steps": 408, "loss": 0.7235, "accuracy": 0.46875, "learning_rate": 1.707317073170732e-06, "epoch": 0.10275229357798166, "percentage": 3.43, "elapsed_time": "0:04:29", "remaining_time": "2:06:17", "throughput": "0.00", "total_tokens": 0}
8
+ {"current_steps": 16, "total_steps": 408, "loss": 0.7405, "accuracy": 0.453125, "learning_rate": 1.951219512195122e-06, "epoch": 0.11743119266055047, "percentage": 3.92, "elapsed_time": "0:05:05", "remaining_time": "2:04:54", "throughput": "0.00", "total_tokens": 0}
9
+ {"current_steps": 18, "total_steps": 408, "loss": 0.6731, "accuracy": 0.609375, "learning_rate": 2.1951219512195125e-06, "epoch": 0.13211009174311927, "percentage": 4.41, "elapsed_time": "0:05:43", "remaining_time": "2:03:58", "throughput": "0.00", "total_tokens": 0}
10
+ {"current_steps": 20, "total_steps": 408, "loss": 0.6784, "accuracy": 0.546875, "learning_rate": 2.4390243902439027e-06, "epoch": 0.14678899082568808, "percentage": 4.9, "elapsed_time": "0:06:25", "remaining_time": "2:04:46", "throughput": "0.00", "total_tokens": 0}
11
+ {"current_steps": 22, "total_steps": 408, "loss": 0.6781, "accuracy": 0.515625, "learning_rate": 2.682926829268293e-06, "epoch": 0.1614678899082569, "percentage": 5.39, "elapsed_time": "0:07:14", "remaining_time": "2:07:04", "throughput": "0.00", "total_tokens": 0}
12
+ {"current_steps": 24, "total_steps": 408, "loss": 0.7306, "accuracy": 0.515625, "learning_rate": 2.926829268292683e-06, "epoch": 0.1761467889908257, "percentage": 5.88, "elapsed_time": "0:07:47", "remaining_time": "2:04:41", "throughput": "0.00", "total_tokens": 0}
13
+ {"current_steps": 26, "total_steps": 408, "loss": 0.7175, "accuracy": 0.546875, "learning_rate": 3.1707317073170736e-06, "epoch": 0.1908256880733945, "percentage": 6.37, "elapsed_time": "0:08:24", "remaining_time": "2:03:30", "throughput": "0.00", "total_tokens": 0}
14
+ {"current_steps": 28, "total_steps": 408, "loss": 0.7409, "accuracy": 0.546875, "learning_rate": 3.414634146341464e-06, "epoch": 0.20550458715596331, "percentage": 6.86, "elapsed_time": "0:09:03", "remaining_time": "2:02:53", "throughput": "0.00", "total_tokens": 0}
15
+ {"current_steps": 30, "total_steps": 408, "loss": 0.6371, "accuracy": 0.609375, "learning_rate": 3.6585365853658537e-06, "epoch": 0.22018348623853212, "percentage": 7.35, "elapsed_time": "0:09:40", "remaining_time": "2:02:00", "throughput": "0.00", "total_tokens": 0}
16
+ {"current_steps": 32, "total_steps": 408, "loss": 0.6733, "accuracy": 0.546875, "learning_rate": 3.902439024390244e-06, "epoch": 0.23486238532110093, "percentage": 7.84, "elapsed_time": "0:10:29", "remaining_time": "2:03:18", "throughput": "0.00", "total_tokens": 0}
17
+ {"current_steps": 34, "total_steps": 408, "loss": 0.6478, "accuracy": 0.640625, "learning_rate": 4.146341463414634e-06, "epoch": 0.24954128440366974, "percentage": 8.33, "elapsed_time": "0:11:16", "remaining_time": "2:04:03", "throughput": "0.00", "total_tokens": 0}
18
+ {"current_steps": 36, "total_steps": 408, "loss": 0.6193, "accuracy": 0.65625, "learning_rate": 4.390243902439025e-06, "epoch": 0.26422018348623855, "percentage": 8.82, "elapsed_time": "0:12:04", "remaining_time": "2:04:46", "throughput": "0.00", "total_tokens": 0}
19
+ {"current_steps": 38, "total_steps": 408, "loss": 0.6598, "accuracy": 0.65625, "learning_rate": 4.634146341463416e-06, "epoch": 0.27889908256880735, "percentage": 9.31, "elapsed_time": "0:12:44", "remaining_time": "2:04:03", "throughput": "0.00", "total_tokens": 0}
20
+ {"current_steps": 40, "total_steps": 408, "loss": 0.6186, "accuracy": 0.6875, "learning_rate": 4.8780487804878055e-06, "epoch": 0.29357798165137616, "percentage": 9.8, "elapsed_time": "0:13:20", "remaining_time": "2:02:42", "throughput": "0.00", "total_tokens": 0}
21
+ {"current_steps": 42, "total_steps": 408, "loss": 0.6248, "accuracy": 0.640625, "learning_rate": 4.999908404322799e-06, "epoch": 0.30825688073394497, "percentage": 10.29, "elapsed_time": "0:13:56", "remaining_time": "2:01:29", "throughput": "0.00", "total_tokens": 0}
22
+ {"current_steps": 44, "total_steps": 408, "loss": 0.5849, "accuracy": 0.765625, "learning_rate": 4.999175679175577e-06, "epoch": 0.3229357798165138, "percentage": 10.78, "elapsed_time": "0:14:33", "remaining_time": "2:00:29", "throughput": "0.00", "total_tokens": 0}
23
+ {"current_steps": 46, "total_steps": 408, "loss": 0.6109, "accuracy": 0.65625, "learning_rate": 4.997710443643461e-06, "epoch": 0.3376146788990826, "percentage": 11.27, "elapsed_time": "0:15:31", "remaining_time": "2:02:07", "throughput": "0.00", "total_tokens": 0}
24
+ {"current_steps": 48, "total_steps": 408, "loss": 0.5456, "accuracy": 0.75, "learning_rate": 4.995513127188151e-06, "epoch": 0.3522935779816514, "percentage": 11.76, "elapsed_time": "0:16:18", "remaining_time": "2:02:19", "throughput": "0.00", "total_tokens": 0}
25
+ {"current_steps": 50, "total_steps": 408, "loss": 0.5091, "accuracy": 0.8125, "learning_rate": 4.992584373844853e-06, "epoch": 0.3669724770642202, "percentage": 12.25, "elapsed_time": "0:16:58", "remaining_time": "2:01:34", "throughput": "0.00", "total_tokens": 0}
26
+ {"current_steps": 52, "total_steps": 408, "loss": 0.501, "accuracy": 0.921875, "learning_rate": 4.98892504203351e-06, "epoch": 0.381651376146789, "percentage": 12.75, "elapsed_time": "0:17:34", "remaining_time": "2:00:22", "throughput": "0.00", "total_tokens": 0}
27
+ {"current_steps": 54, "total_steps": 408, "loss": 0.4541, "accuracy": 0.9375, "learning_rate": 4.9845362043071925e-06, "epoch": 0.3963302752293578, "percentage": 13.24, "elapsed_time": "0:18:09", "remaining_time": "1:59:04", "throughput": "0.00", "total_tokens": 0}
28
+ {"current_steps": 56, "total_steps": 408, "loss": 0.4487, "accuracy": 0.875, "learning_rate": 4.97941914703774e-06, "epoch": 0.41100917431192663, "percentage": 13.73, "elapsed_time": "0:18:58", "remaining_time": "1:59:19", "throughput": "0.00", "total_tokens": 0}
29
+ {"current_steps": 58, "total_steps": 408, "loss": 0.3851, "accuracy": 0.90625, "learning_rate": 4.973575370038718e-06, "epoch": 0.42568807339449544, "percentage": 14.22, "elapsed_time": "0:19:42", "remaining_time": "1:58:55", "throughput": "0.00", "total_tokens": 0}
30
+ {"current_steps": 60, "total_steps": 408, "loss": 0.35, "accuracy": 0.9375, "learning_rate": 4.967006586125827e-06, "epoch": 0.44036697247706424, "percentage": 14.71, "elapsed_time": "0:20:20", "remaining_time": "1:58:01", "throughput": "0.00", "total_tokens": 0}
31
+ {"current_steps": 62, "total_steps": 408, "loss": 0.3239, "accuracy": 0.953125, "learning_rate": 4.959714720614871e-06, "epoch": 0.45504587155963305, "percentage": 15.2, "elapsed_time": "0:21:11", "remaining_time": "1:58:14", "throughput": "0.00", "total_tokens": 0}
32
+ {"current_steps": 64, "total_steps": 408, "loss": 0.3088, "accuracy": 0.96875, "learning_rate": 4.951701910757446e-06, "epoch": 0.46972477064220186, "percentage": 15.69, "elapsed_time": "0:21:50", "remaining_time": "1:57:25", "throughput": "0.00", "total_tokens": 0}
33
+ {"current_steps": 66, "total_steps": 408, "loss": 0.268, "accuracy": 0.984375, "learning_rate": 4.942970505114514e-06, "epoch": 0.48440366972477067, "percentage": 16.18, "elapsed_time": "0:22:23", "remaining_time": "1:56:03", "throughput": "0.00", "total_tokens": 0}
34
+ {"current_steps": 68, "total_steps": 408, "loss": 0.2372, "accuracy": 0.96875, "learning_rate": 4.933523062868033e-06, "epoch": 0.4990825688073395, "percentage": 16.67, "elapsed_time": "0:22:57", "remaining_time": "1:54:46", "throughput": "0.00", "total_tokens": 0}
35
+ {"current_steps": 70, "total_steps": 408, "loss": 0.2079, "accuracy": 0.984375, "learning_rate": 4.923362353070859e-06, "epoch": 0.5137614678899083, "percentage": 17.16, "elapsed_time": "0:23:33", "remaining_time": "1:53:43", "throughput": "0.00", "total_tokens": 0}
36
+ {"current_steps": 72, "total_steps": 408, "loss": 0.2185, "accuracy": 0.984375, "learning_rate": 4.912491353835138e-06, "epoch": 0.5284403669724771, "percentage": 17.65, "elapsed_time": "0:24:22", "remaining_time": "1:53:43", "throughput": "0.00", "total_tokens": 0}
37
+ {"current_steps": 74, "total_steps": 408, "loss": 0.1769, "accuracy": 1.0, "learning_rate": 4.900913251459418e-06, "epoch": 0.5431192660550459, "percentage": 18.14, "elapsed_time": "0:24:56", "remaining_time": "1:52:35", "throughput": "0.00", "total_tokens": 0}
38
+ {"current_steps": 76, "total_steps": 408, "loss": 0.1608, "accuracy": 0.96875, "learning_rate": 4.8886314394947396e-06, "epoch": 0.5577981651376147, "percentage": 18.63, "elapsed_time": "0:25:29", "remaining_time": "1:51:21", "throughput": "0.00", "total_tokens": 0}
39
+ {"current_steps": 78, "total_steps": 408, "loss": 0.1548, "accuracy": 0.96875, "learning_rate": 4.875649517749985e-06, "epoch": 0.5724770642201835, "percentage": 19.12, "elapsed_time": "0:26:01", "remaining_time": "1:50:07", "throughput": "0.00", "total_tokens": 0}
40
+ {"current_steps": 80, "total_steps": 408, "loss": 0.1841, "accuracy": 0.9375, "learning_rate": 4.861971291236772e-06, "epoch": 0.5871559633027523, "percentage": 19.61, "elapsed_time": "0:26:38", "remaining_time": "1:49:14", "throughput": "0.00", "total_tokens": 0}
41
+ {"current_steps": 82, "total_steps": 408, "loss": 0.1093, "accuracy": 0.984375, "learning_rate": 4.847600769054201e-06, "epoch": 0.6018348623853211, "percentage": 20.1, "elapsed_time": "0:27:24", "remaining_time": "1:48:56", "throughput": "0.00", "total_tokens": 0}
42
+ {"current_steps": 84, "total_steps": 408, "loss": 0.1073, "accuracy": 0.984375, "learning_rate": 4.832542163213787e-06, "epoch": 0.6165137614678899, "percentage": 20.59, "elapsed_time": "0:27:58", "remaining_time": "1:47:52", "throughput": "0.00", "total_tokens": 0}
43
+ {"current_steps": 86, "total_steps": 408, "loss": 0.1277, "accuracy": 0.96875, "learning_rate": 4.816799887404911e-06, "epoch": 0.6311926605504588, "percentage": 21.08, "elapsed_time": "0:28:43", "remaining_time": "1:47:33", "throughput": "0.00", "total_tokens": 0}
44
+ {"current_steps": 88, "total_steps": 408, "loss": 0.1089, "accuracy": 0.953125, "learning_rate": 4.800378555701168e-06, "epoch": 0.6458715596330276, "percentage": 21.57, "elapsed_time": "0:29:22", "remaining_time": "1:46:48", "throughput": "0.00", "total_tokens": 0}
45
+ {"current_steps": 90, "total_steps": 408, "loss": 0.0866, "accuracy": 0.984375, "learning_rate": 4.783282981207979e-06, "epoch": 0.6605504587155964, "percentage": 22.06, "elapsed_time": "0:30:01", "remaining_time": "1:46:04", "throughput": "0.00", "total_tokens": 0}
46
+ {"current_steps": 92, "total_steps": 408, "loss": 0.0852, "accuracy": 0.984375, "learning_rate": 4.765518174651864e-06, "epoch": 0.6752293577981652, "percentage": 22.55, "elapsed_time": "0:30:36", "remaining_time": "1:45:07", "throughput": "0.00", "total_tokens": 0}
47
+ {"current_steps": 94, "total_steps": 408, "loss": 0.0446, "accuracy": 1.0, "learning_rate": 4.747089342911793e-06, "epoch": 0.689908256880734, "percentage": 23.04, "elapsed_time": "0:31:21", "remaining_time": "1:44:44", "throughput": "0.00", "total_tokens": 0}
48
+ {"current_steps": 96, "total_steps": 408, "loss": 0.067, "accuracy": 0.984375, "learning_rate": 4.728001887493048e-06, "epoch": 0.7045871559633028, "percentage": 23.53, "elapsed_time": "0:31:55", "remaining_time": "1:43:44", "throughput": "0.00", "total_tokens": 0}
49
+ {"current_steps": 98, "total_steps": 408, "loss": 0.06, "accuracy": 1.0, "learning_rate": 4.708261402944036e-06, "epoch": 0.7192660550458716, "percentage": 24.02, "elapsed_time": "0:32:36", "remaining_time": "1:43:07", "throughput": "0.00", "total_tokens": 0}
50
+ {"current_steps": 100, "total_steps": 408, "loss": 0.0892, "accuracy": 0.953125, "learning_rate": 4.687873675216522e-06, "epoch": 0.7339449541284404, "percentage": 24.51, "elapsed_time": "0:33:20", "remaining_time": "1:42:41", "throughput": "0.00", "total_tokens": 0}
51
+ {"current_steps": 102, "total_steps": 408, "loss": 0.0373, "accuracy": 1.0, "learning_rate": 4.666844679969765e-06, "epoch": 0.7486238532110092, "percentage": 25.0, "elapsed_time": "0:34:15", "remaining_time": "1:42:46", "throughput": "0.00", "total_tokens": 0}
52
+ {"current_steps": 104, "total_steps": 408, "loss": 0.0317, "accuracy": 1.0, "learning_rate": 4.6451805808190464e-06, "epoch": 0.763302752293578, "percentage": 25.49, "elapsed_time": "0:34:49", "remaining_time": "1:41:49", "throughput": "0.00", "total_tokens": 0}
53
+ {"current_steps": 106, "total_steps": 408, "loss": 0.029, "accuracy": 1.0, "learning_rate": 4.622887727529104e-06, "epoch": 0.7779816513761468, "percentage": 25.98, "elapsed_time": "0:35:29", "remaining_time": "1:41:07", "throughput": "0.00", "total_tokens": 0}
54
+ {"current_steps": 108, "total_steps": 408, "loss": 0.0311, "accuracy": 1.0, "learning_rate": 4.599972654153018e-06, "epoch": 0.7926605504587156, "percentage": 26.47, "elapsed_time": "0:36:00", "remaining_time": "1:40:02", "throughput": "0.00", "total_tokens": 0}
55
+ {"current_steps": 110, "total_steps": 408, "loss": 0.0215, "accuracy": 1.0, "learning_rate": 4.5764420771170735e-06, "epoch": 0.8073394495412844, "percentage": 26.96, "elapsed_time": "0:36:31", "remaining_time": "1:38:55", "throughput": "0.00", "total_tokens": 0}
56
+ {"current_steps": 112, "total_steps": 408, "loss": 0.0296, "accuracy": 1.0, "learning_rate": 4.552302893252166e-06, "epoch": 0.8220183486238533, "percentage": 27.45, "elapsed_time": "0:37:18", "remaining_time": "1:38:34", "throughput": "0.00", "total_tokens": 0}
57
+ {"current_steps": 114, "total_steps": 408, "loss": 0.0398, "accuracy": 1.0, "learning_rate": 4.52756217777234e-06, "epoch": 0.8366972477064221, "percentage": 27.94, "elapsed_time": "0:37:56", "remaining_time": "1:37:51", "throughput": "0.00", "total_tokens": 0}
58
+ {"current_steps": 116, "total_steps": 408, "loss": 0.0227, "accuracy": 1.0, "learning_rate": 4.502227182201035e-06, "epoch": 0.8513761467889909, "percentage": 28.43, "elapsed_time": "0:38:31", "remaining_time": "1:36:57", "throughput": "0.00", "total_tokens": 0}
59
+ {"current_steps": 118, "total_steps": 408, "loss": 0.028, "accuracy": 1.0, "learning_rate": 4.476305332245662e-06, "epoch": 0.8660550458715597, "percentage": 28.92, "elapsed_time": "0:39:20", "remaining_time": "1:36:42", "throughput": "0.00", "total_tokens": 0}
60
+ {"current_steps": 120, "total_steps": 408, "loss": 0.0485, "accuracy": 0.984375, "learning_rate": 4.449804225621116e-06, "epoch": 0.8807339449541285, "percentage": 29.41, "elapsed_time": "0:39:56", "remaining_time": "1:35:50", "throughput": "0.00", "total_tokens": 0}
61
+ {"current_steps": 122, "total_steps": 408, "loss": 0.0315, "accuracy": 1.0, "learning_rate": 4.422731629822887e-06, "epoch": 0.8954128440366973, "percentage": 29.9, "elapsed_time": "0:40:36", "remaining_time": "1:35:12", "throughput": "0.00", "total_tokens": 0}
62
+ {"current_steps": 124, "total_steps": 408, "loss": 0.0548, "accuracy": 1.0, "learning_rate": 4.395095479850396e-06, "epoch": 0.9100917431192661, "percentage": 30.39, "elapsed_time": "0:41:07", "remaining_time": "1:34:11", "throughput": "0.00", "total_tokens": 0}
63
+ {"current_steps": 126, "total_steps": 408, "loss": 0.0128, "accuracy": 1.0, "learning_rate": 4.366903875881243e-06, "epoch": 0.9247706422018349, "percentage": 30.88, "elapsed_time": "0:41:52", "remaining_time": "1:33:42", "throughput": "0.00", "total_tokens": 0}
64
+ {"current_steps": 128, "total_steps": 408, "loss": 0.0226, "accuracy": 1.0, "learning_rate": 4.3381650808970365e-06, "epoch": 0.9394495412844037, "percentage": 31.37, "elapsed_time": "0:42:25", "remaining_time": "1:32:47", "throughput": "0.00", "total_tokens": 0}
65
+ {"current_steps": 130, "total_steps": 408, "loss": 0.0194, "accuracy": 1.0, "learning_rate": 4.308887518261507e-06, "epoch": 0.9541284403669725, "percentage": 31.86, "elapsed_time": "0:43:01", "remaining_time": "1:32:00", "throughput": "0.00", "total_tokens": 0}
66
+ {"current_steps": 132, "total_steps": 408, "loss": 0.0184, "accuracy": 1.0, "learning_rate": 4.279079769251617e-06, "epoch": 0.9688073394495413, "percentage": 32.35, "elapsed_time": "0:43:47", "remaining_time": "1:31:33", "throughput": "0.00", "total_tokens": 0}
67
+ {"current_steps": 134, "total_steps": 408, "loss": 0.0231, "accuracy": 1.0, "learning_rate": 4.248750570542373e-06, "epoch": 0.9834862385321101, "percentage": 32.84, "elapsed_time": "0:44:22", "remaining_time": "1:30:44", "throughput": "0.00", "total_tokens": 0}
68
+ {"current_steps": 136, "total_steps": 408, "loss": 0.0379, "accuracy": 1.0, "learning_rate": 4.21790881164611e-06, "epoch": 0.998165137614679, "percentage": 33.33, "elapsed_time": "0:44:59", "remaining_time": "1:29:59", "throughput": "0.00", "total_tokens": 0}
69
+ {"current_steps": 138, "total_steps": 408, "loss": 0.028, "accuracy": 1.0, "learning_rate": 4.186563532306957e-06, "epoch": 1.0128440366972478, "percentage": 33.82, "elapsed_time": "0:45:33", "remaining_time": "1:29:07", "throughput": "0.00", "total_tokens": 0}
70
+ {"current_steps": 140, "total_steps": 408, "loss": 0.0308, "accuracy": 1.0, "learning_rate": 4.154723919851291e-06, "epoch": 1.0275229357798166, "percentage": 34.31, "elapsed_time": "0:46:09", "remaining_time": "1:28:22", "throughput": "0.00", "total_tokens": 0}
71
+ {"current_steps": 142, "total_steps": 408, "loss": 0.0211, "accuracy": 1.0, "learning_rate": 4.122399306494918e-06, "epoch": 1.0422018348623854, "percentage": 34.8, "elapsed_time": "0:46:54", "remaining_time": "1:27:52", "throughput": "0.00", "total_tokens": 0}
72
+ {"current_steps": 144, "total_steps": 408, "loss": 0.0121, "accuracy": 1.0, "learning_rate": 4.089599166607794e-06, "epoch": 1.0568807339449542, "percentage": 35.29, "elapsed_time": "0:47:30", "remaining_time": "1:27:05", "throughput": "0.00", "total_tokens": 0}
73
+ {"current_steps": 146, "total_steps": 408, "loss": 0.0268, "accuracy": 1.0, "learning_rate": 4.05633311393708e-06, "epoch": 1.071559633027523, "percentage": 35.78, "elapsed_time": "0:48:13", "remaining_time": "1:26:32", "throughput": "0.00", "total_tokens": 0}
74
+ {"current_steps": 148, "total_steps": 408, "loss": 0.0169, "accuracy": 1.0, "learning_rate": 4.022610898789349e-06, "epoch": 1.0862385321100918, "percentage": 36.27, "elapsed_time": "0:48:45", "remaining_time": "1:25:39", "throughput": "0.00", "total_tokens": 0}
75
+ {"current_steps": 150, "total_steps": 408, "loss": 0.0121, "accuracy": 1.0, "learning_rate": 3.988442405172755e-06, "epoch": 1.1009174311926606, "percentage": 36.76, "elapsed_time": "0:49:26", "remaining_time": "1:25:03", "throughput": "0.00", "total_tokens": 0}
76
+ {"current_steps": 152, "total_steps": 408, "loss": 0.0107, "accuracy": 1.0, "learning_rate": 3.953837647900031e-06, "epoch": 1.1155963302752294, "percentage": 37.25, "elapsed_time": "0:50:08", "remaining_time": "1:24:27", "throughput": "0.00", "total_tokens": 0}
77
+ {"current_steps": 154, "total_steps": 408, "loss": 0.0324, "accuracy": 1.0, "learning_rate": 3.918806769653135e-06, "epoch": 1.1302752293577982, "percentage": 37.75, "elapsed_time": "0:50:54", "remaining_time": "1:23:57", "throughput": "0.00", "total_tokens": 0}
78
+ {"current_steps": 156, "total_steps": 408, "loss": 0.0114, "accuracy": 1.0, "learning_rate": 3.88336003801042e-06, "epoch": 1.144954128440367, "percentage": 38.24, "elapsed_time": "0:51:25", "remaining_time": "1:23:04", "throughput": "0.00", "total_tokens": 0}
79
+ {"current_steps": 158, "total_steps": 408, "loss": 0.007, "accuracy": 1.0, "learning_rate": 3.847507842437205e-06, "epoch": 1.1596330275229358, "percentage": 38.73, "elapsed_time": "0:51:59", "remaining_time": "1:22:16", "throughput": "0.00", "total_tokens": 0}
80
+ {"current_steps": 160, "total_steps": 408, "loss": 0.009, "accuracy": 1.0, "learning_rate": 3.811260691240604e-06, "epoch": 1.1743119266055047, "percentage": 39.22, "elapsed_time": "0:52:36", "remaining_time": "1:21:31", "throughput": "0.00", "total_tokens": 0}
81
+ {"current_steps": 162, "total_steps": 408, "loss": 0.0084, "accuracy": 1.0, "learning_rate": 3.774629208489547e-06, "epoch": 1.1889908256880735, "percentage": 39.71, "elapsed_time": "0:53:07", "remaining_time": "1:20:40", "throughput": "0.00", "total_tokens": 0}
82
+ {"current_steps": 164, "total_steps": 408, "loss": 0.0133, "accuracy": 1.0, "learning_rate": 3.7376241309008433e-06, "epoch": 1.2036697247706423, "percentage": 40.2, "elapsed_time": "0:53:46", "remaining_time": "1:19:59", "throughput": "0.00", "total_tokens": 0}
83
+ {"current_steps": 166, "total_steps": 408, "loss": 0.0044, "accuracy": 1.0, "learning_rate": 3.7002563046922502e-06, "epoch": 1.218348623853211, "percentage": 40.69, "elapsed_time": "0:54:38", "remaining_time": "1:19:39", "throughput": "0.00", "total_tokens": 0}
84
+ {"current_steps": 168, "total_steps": 408, "loss": 0.0139, "accuracy": 1.0, "learning_rate": 3.6625366824034337e-06, "epoch": 1.2330275229357799, "percentage": 41.18, "elapsed_time": "0:55:19", "remaining_time": "1:19:01", "throughput": "0.00", "total_tokens": 0}
85
+ {"current_steps": 170, "total_steps": 408, "loss": 0.013, "accuracy": 1.0, "learning_rate": 3.6244763196857714e-06, "epoch": 1.2477064220183487, "percentage": 41.67, "elapsed_time": "0:56:04", "remaining_time": "1:18:29", "throughput": "0.00", "total_tokens": 0}
86
+ {"current_steps": 172, "total_steps": 408, "loss": 0.0137, "accuracy": 1.0, "learning_rate": 3.5860863720619333e-06, "epoch": 1.2623853211009175, "percentage": 42.16, "elapsed_time": "0:56:51", "remaining_time": "1:18:01", "throughput": "0.00", "total_tokens": 0}
87
+ {"current_steps": 174, "total_steps": 408, "loss": 0.0092, "accuracy": 1.0, "learning_rate": 3.547378091656186e-06, "epoch": 1.2770642201834863, "percentage": 42.65, "elapsed_time": "0:57:27", "remaining_time": "1:17:15", "throughput": "0.00", "total_tokens": 0}
88
+ {"current_steps": 176, "total_steps": 408, "loss": 0.0147, "accuracy": 1.0, "learning_rate": 3.5083628238963913e-06, "epoch": 1.2917431192660551, "percentage": 43.14, "elapsed_time": "0:58:16", "remaining_time": "1:16:49", "throughput": "0.00", "total_tokens": 0}
89
+ {"current_steps": 178, "total_steps": 408, "loss": 0.0157, "accuracy": 1.0, "learning_rate": 3.4690520041886473e-06, "epoch": 1.306422018348624, "percentage": 43.63, "elapsed_time": "0:58:58", "remaining_time": "1:16:11", "throughput": "0.00", "total_tokens": 0}
90
+ {"current_steps": 180, "total_steps": 408, "loss": 0.0033, "accuracy": 1.0, "learning_rate": 3.4294571545655653e-06, "epoch": 1.3211009174311927, "percentage": 44.12, "elapsed_time": "0:59:39", "remaining_time": "1:15:33", "throughput": "0.00", "total_tokens": 0}
91
+ {"current_steps": 182, "total_steps": 408, "loss": 0.0592, "accuracy": 0.984375, "learning_rate": 3.38958988030915e-06, "epoch": 1.3357798165137615, "percentage": 44.61, "elapsed_time": "1:00:27", "remaining_time": "1:15:04", "throughput": "0.00", "total_tokens": 0}
92
+ {"current_steps": 184, "total_steps": 408, "loss": 0.0152, "accuracy": 1.0, "learning_rate": 3.3494618665492833e-06, "epoch": 1.3504587155963304, "percentage": 45.1, "elapsed_time": "1:01:05", "remaining_time": "1:14:21", "throughput": "0.00", "total_tokens": 0}
93
+ {"current_steps": 186, "total_steps": 408, "loss": 0.0061, "accuracy": 1.0, "learning_rate": 3.3090848748388042e-06, "epoch": 1.3651376146788992, "percentage": 45.59, "elapsed_time": "1:01:48", "remaining_time": "1:13:46", "throughput": "0.00", "total_tokens": 0}
94
+ {"current_steps": 188, "total_steps": 408, "loss": 0.0124, "accuracy": 1.0, "learning_rate": 3.2684707397061887e-06, "epoch": 1.379816513761468, "percentage": 46.08, "elapsed_time": "1:02:46", "remaining_time": "1:13:27", "throughput": "0.00", "total_tokens": 0}
95
+ {"current_steps": 190, "total_steps": 408, "loss": 0.0139, "accuracy": 1.0, "learning_rate": 3.2276313651868364e-06, "epoch": 1.3944954128440368, "percentage": 46.57, "elapsed_time": "1:03:18", "remaining_time": "1:12:37", "throughput": "0.00", "total_tokens": 0}
96
+ {"current_steps": 192, "total_steps": 408, "loss": 0.0115, "accuracy": 1.0, "learning_rate": 3.1865787213339926e-06, "epoch": 1.4091743119266056, "percentage": 47.06, "elapsed_time": "1:03:52", "remaining_time": "1:11:51", "throughput": "0.00", "total_tokens": 0}
97
+ {"current_steps": 194, "total_steps": 408, "loss": 0.008, "accuracy": 1.0, "learning_rate": 3.1453248407103156e-06, "epoch": 1.4238532110091744, "percentage": 47.55, "elapsed_time": "1:04:30", "remaining_time": "1:11:09", "throughput": "0.00", "total_tokens": 0}
98
+ {"current_steps": 196, "total_steps": 408, "loss": 0.0132, "accuracy": 1.0, "learning_rate": 3.1038818148611178e-06, "epoch": 1.4385321100917432, "percentage": 48.04, "elapsed_time": "1:05:03", "remaining_time": "1:10:22", "throughput": "0.00", "total_tokens": 0}
99
+ {"current_steps": 198, "total_steps": 408, "loss": 0.0253, "accuracy": 1.0, "learning_rate": 3.062261790770331e-06, "epoch": 1.453211009174312, "percentage": 48.53, "elapsed_time": "1:05:37", "remaining_time": "1:09:36", "throughput": "0.00", "total_tokens": 0}
100
+ {"current_steps": 200, "total_steps": 408, "loss": 0.0062, "accuracy": 1.0, "learning_rate": 3.0204769673002123e-06, "epoch": 1.4678899082568808, "percentage": 49.02, "elapsed_time": "1:06:12", "remaining_time": "1:08:51", "throughput": "0.00", "total_tokens": 0}
101
+ {"current_steps": 202, "total_steps": 408, "loss": 0.0167, "accuracy": 0.984375, "learning_rate": 2.978539591615848e-06, "epoch": 1.4825688073394496, "percentage": 49.51, "elapsed_time": "1:06:44", "remaining_time": "1:08:04", "throughput": "0.00", "total_tokens": 0}
102
+ {"current_steps": 204, "total_steps": 408, "loss": 0.0068, "accuracy": 1.0, "learning_rate": 2.936461955595501e-06, "epoch": 1.4972477064220184, "percentage": 50.0, "elapsed_time": "1:07:17", "remaining_time": "1:07:17", "throughput": "0.00", "total_tokens": 0}
103
+ {"current_steps": 206, "total_steps": 408, "loss": 0.011, "accuracy": 1.0, "learning_rate": 2.8942563922278487e-06, "epoch": 1.5119266055045872, "percentage": 50.49, "elapsed_time": "1:07:54", "remaining_time": "1:06:35", "throughput": "0.00", "total_tokens": 0}
104
+ {"current_steps": 208, "total_steps": 408, "loss": 0.0309, "accuracy": 0.984375, "learning_rate": 2.8519352719971783e-06, "epoch": 1.526605504587156, "percentage": 50.98, "elapsed_time": "1:08:40", "remaining_time": "1:06:01", "throughput": "0.00", "total_tokens": 0}
105
+ {"current_steps": 210, "total_steps": 408, "loss": 0.0049, "accuracy": 1.0, "learning_rate": 2.8095109992575824e-06, "epoch": 1.5412844036697249, "percentage": 51.47, "elapsed_time": "1:09:24", "remaining_time": "1:05:26", "throughput": "0.00", "total_tokens": 0}
106
+ {"current_steps": 212, "total_steps": 408, "loss": 0.0047, "accuracy": 1.0, "learning_rate": 2.7669960085972407e-06, "epoch": 1.5559633027522937, "percentage": 51.96, "elapsed_time": "1:10:06", "remaining_time": "1:04:49", "throughput": "0.00", "total_tokens": 0}
107
+ {"current_steps": 214, "total_steps": 408, "loss": 0.0215, "accuracy": 1.0, "learning_rate": 2.7244027611938247e-06, "epoch": 1.5706422018348625, "percentage": 52.45, "elapsed_time": "1:10:41", "remaining_time": "1:04:05", "throughput": "0.00", "total_tokens": 0}
108
+ {"current_steps": 216, "total_steps": 408, "loss": 0.007, "accuracy": 1.0, "learning_rate": 2.6817437411621194e-06, "epoch": 1.5853211009174313, "percentage": 52.94, "elapsed_time": "1:11:22", "remaining_time": "1:03:26", "throughput": "0.00", "total_tokens": 0}
109
+ {"current_steps": 218, "total_steps": 408, "loss": 0.0033, "accuracy": 1.0, "learning_rate": 2.639031451894923e-06, "epoch": 1.6, "percentage": 53.43, "elapsed_time": "1:12:14", "remaining_time": "1:02:57", "throughput": "0.00", "total_tokens": 0}
110
+ {"current_steps": 220, "total_steps": 408, "loss": 0.0122, "accuracy": 1.0, "learning_rate": 2.5962784123982843e-06, "epoch": 1.614678899082569, "percentage": 53.92, "elapsed_time": "1:12:55", "remaining_time": "1:02:19", "throughput": "0.00", "total_tokens": 0}
111
+ {"current_steps": 222, "total_steps": 408, "loss": 0.024, "accuracy": 0.984375, "learning_rate": 2.5534971536221804e-06, "epoch": 1.6293577981651377, "percentage": 54.41, "elapsed_time": "1:13:32", "remaining_time": "1:01:37", "throughput": "0.00", "total_tokens": 0}
112
+ {"current_steps": 224, "total_steps": 408, "loss": 0.0135, "accuracy": 1.0, "learning_rate": 2.5107002147876814e-06, "epoch": 1.6440366972477065, "percentage": 54.9, "elapsed_time": "1:14:08", "remaining_time": "1:00:54", "throughput": "0.00", "total_tokens": 0}
113
+ {"current_steps": 226, "total_steps": 408, "loss": 0.0238, "accuracy": 1.0, "learning_rate": 2.467900139711693e-06, "epoch": 1.6587155963302753, "percentage": 55.39, "elapsed_time": "1:14:47", "remaining_time": "1:00:13", "throughput": "0.00", "total_tokens": 0}
114
+ {"current_steps": 228, "total_steps": 408, "loss": 0.0057, "accuracy": 1.0, "learning_rate": 2.4251094731303586e-06, "epoch": 1.6733944954128441, "percentage": 55.88, "elapsed_time": "1:15:33", "remaining_time": "0:59:39", "throughput": "0.00", "total_tokens": 0}
115
+ {"current_steps": 230, "total_steps": 408, "loss": 0.006, "accuracy": 1.0, "learning_rate": 2.3823407570221812e-06, "epoch": 1.688073394495413, "percentage": 56.37, "elapsed_time": "1:16:06", "remaining_time": "0:58:54", "throughput": "0.00", "total_tokens": 0}
116
+ {"current_steps": 232, "total_steps": 408, "loss": 0.0091, "accuracy": 1.0, "learning_rate": 2.3396065269319655e-06, "epoch": 1.7027522935779817, "percentage": 56.86, "elapsed_time": "1:16:41", "remaining_time": "0:58:11", "throughput": "0.00", "total_tokens": 0}
117
+ {"current_steps": 234, "total_steps": 408, "loss": 0.0061, "accuracy": 1.0, "learning_rate": 2.2969193082966353e-06, "epoch": 1.7174311926605506, "percentage": 57.35, "elapsed_time": "1:17:14", "remaining_time": "0:57:26", "throughput": "0.00", "total_tokens": 0}
118
+ {"current_steps": 236, "total_steps": 408, "loss": 0.0121, "accuracy": 1.0, "learning_rate": 2.2542916127740194e-06, "epoch": 1.7321100917431194, "percentage": 57.84, "elapsed_time": "1:17:54", "remaining_time": "0:56:46", "throughput": "0.00", "total_tokens": 0}
119
+ {"current_steps": 238, "total_steps": 408, "loss": 0.0067, "accuracy": 1.0, "learning_rate": 2.211735934575674e-06, "epoch": 1.7467889908256882, "percentage": 58.33, "elapsed_time": "1:18:32", "remaining_time": "0:56:06", "throughput": "0.00", "total_tokens": 0}
120
+ {"current_steps": 240, "total_steps": 408, "loss": 0.0062, "accuracy": 1.0, "learning_rate": 2.1692647468048235e-06, "epoch": 1.761467889908257, "percentage": 58.82, "elapsed_time": "1:19:07", "remaining_time": "0:55:23", "throughput": "0.00", "total_tokens": 0}
121
+ {"current_steps": 242, "total_steps": 408, "loss": 0.0184, "accuracy": 1.0, "learning_rate": 2.126890497800477e-06, "epoch": 1.7761467889908258, "percentage": 59.31, "elapsed_time": "1:19:42", "remaining_time": "0:54:40", "throughput": "0.00", "total_tokens": 0}
122
+ {"current_steps": 244, "total_steps": 408, "loss": 0.0105, "accuracy": 1.0, "learning_rate": 2.084625607488816e-06, "epoch": 1.7908256880733946, "percentage": 59.8, "elapsed_time": "1:20:20", "remaining_time": "0:54:00", "throughput": "0.00", "total_tokens": 0}
123
+ {"current_steps": 246, "total_steps": 408, "loss": 0.0084, "accuracy": 1.0, "learning_rate": 2.0424824637428995e-06, "epoch": 1.8055045871559634, "percentage": 60.29, "elapsed_time": "1:20:57", "remaining_time": "0:53:18", "throughput": "0.00", "total_tokens": 0}
124
+ {"current_steps": 248, "total_steps": 408, "loss": 0.0103, "accuracy": 1.0, "learning_rate": 2.0004734187517744e-06, "epoch": 1.8201834862385322, "percentage": 60.78, "elapsed_time": "1:21:50", "remaining_time": "0:52:48", "throughput": "0.00", "total_tokens": 0}
125
+ {"current_steps": 250, "total_steps": 408, "loss": 0.0058, "accuracy": 1.0, "learning_rate": 1.9586107854000327e-06, "epoch": 1.834862385321101, "percentage": 61.27, "elapsed_time": "1:22:41", "remaining_time": "0:52:15", "throughput": "0.00", "total_tokens": 0}
126
+ {"current_steps": 252, "total_steps": 408, "loss": 0.0041, "accuracy": 1.0, "learning_rate": 1.916906833658899e-06, "epoch": 1.8495412844036698, "percentage": 61.76, "elapsed_time": "1:23:14", "remaining_time": "0:51:31", "throughput": "0.00", "total_tokens": 0}
127
+ {"current_steps": 254, "total_steps": 408, "loss": 0.007, "accuracy": 1.0, "learning_rate": 1.8753737869898921e-06, "epoch": 1.8642201834862386, "percentage": 62.25, "elapsed_time": "1:23:54", "remaining_time": "0:50:52", "throughput": "0.00", "total_tokens": 0}
128
+ {"current_steps": 256, "total_steps": 408, "loss": 0.0327, "accuracy": 0.984375, "learning_rate": 1.8340238187621185e-06, "epoch": 1.8788990825688074, "percentage": 62.75, "elapsed_time": "1:24:28", "remaining_time": "0:50:09", "throughput": "0.00", "total_tokens": 0}
129
+ {"current_steps": 258, "total_steps": 408, "loss": 0.0064, "accuracy": 1.0, "learning_rate": 1.7928690486842438e-06, "epoch": 1.8935779816513763, "percentage": 63.24, "elapsed_time": "1:25:11", "remaining_time": "0:49:31", "throughput": "0.00", "total_tokens": 0}
130
+ {"current_steps": 260, "total_steps": 408, "loss": 0.0064, "accuracy": 1.0, "learning_rate": 1.7519215392522026e-06, "epoch": 1.908256880733945, "percentage": 63.73, "elapsed_time": "1:25:51", "remaining_time": "0:48:52", "throughput": "0.00", "total_tokens": 0}
131
+ {"current_steps": 262, "total_steps": 408, "loss": 0.019, "accuracy": 0.984375, "learning_rate": 1.7111932922136715e-06, "epoch": 1.9229357798165139, "percentage": 64.22, "elapsed_time": "1:26:26", "remaining_time": "0:48:10", "throughput": "0.00", "total_tokens": 0}
132
+ {"current_steps": 264, "total_steps": 408, "loss": 0.0059, "accuracy": 1.0, "learning_rate": 1.6706962450503408e-06, "epoch": 1.9376146788990827, "percentage": 64.71, "elapsed_time": "1:27:02", "remaining_time": "0:47:28", "throughput": "0.00", "total_tokens": 0}
133
+ {"current_steps": 266, "total_steps": 408, "loss": 0.0113, "accuracy": 1.0, "learning_rate": 1.630442267479034e-06, "epoch": 1.9522935779816515, "percentage": 65.2, "elapsed_time": "1:27:43", "remaining_time": "0:46:49", "throughput": "0.00", "total_tokens": 0}
134
+ {"current_steps": 268, "total_steps": 408, "loss": 0.0066, "accuracy": 1.0, "learning_rate": 1.5904431579726837e-06, "epoch": 1.9669724770642203, "percentage": 65.69, "elapsed_time": "1:28:19", "remaining_time": "0:46:08", "throughput": "0.00", "total_tokens": 0}
135
+ {"current_steps": 270, "total_steps": 408, "loss": 0.0028, "accuracy": 1.0, "learning_rate": 1.5507106403021897e-06, "epoch": 1.981651376146789, "percentage": 66.18, "elapsed_time": "1:28:58", "remaining_time": "0:45:28", "throughput": "0.00", "total_tokens": 0}
136
+ {"current_steps": 272, "total_steps": 408, "loss": 0.0052, "accuracy": 1.0, "learning_rate": 1.511256360100171e-06, "epoch": 1.996330275229358, "percentage": 66.67, "elapsed_time": "1:29:31", "remaining_time": "0:44:45", "throughput": "0.00", "total_tokens": 0}
137
+ {"current_steps": 274, "total_steps": 408, "loss": 0.0107, "accuracy": 1.0, "learning_rate": 1.4720918814476234e-06, "epoch": 2.0110091743119267, "percentage": 67.16, "elapsed_time": "1:30:16", "remaining_time": "0:44:08", "throughput": "0.00", "total_tokens": 0}
138
+ {"current_steps": 276, "total_steps": 408, "loss": 0.0033, "accuracy": 1.0, "learning_rate": 1.4332286834844792e-06, "epoch": 2.0256880733944955, "percentage": 67.65, "elapsed_time": "1:31:05", "remaining_time": "0:43:33", "throughput": "0.00", "total_tokens": 0}
139
+ {"current_steps": 278, "total_steps": 408, "loss": 0.0026, "accuracy": 1.0, "learning_rate": 1.3946781570450563e-06, "epoch": 2.0403669724770643, "percentage": 68.14, "elapsed_time": "1:31:50", "remaining_time": "0:42:56", "throughput": "0.00", "total_tokens": 0}
140
+ {"current_steps": 280, "total_steps": 408, "loss": 0.004, "accuracy": 1.0, "learning_rate": 1.3564516013194023e-06, "epoch": 2.055045871559633, "percentage": 68.63, "elapsed_time": "1:32:21", "remaining_time": "0:42:13", "throughput": "0.00", "total_tokens": 0}
141
+ {"current_steps": 282, "total_steps": 408, "loss": 0.0059, "accuracy": 1.0, "learning_rate": 1.3185602205414894e-06, "epoch": 2.069724770642202, "percentage": 69.12, "elapsed_time": "1:33:03", "remaining_time": "0:41:34", "throughput": "0.00", "total_tokens": 0}
142
+ {"current_steps": 284, "total_steps": 408, "loss": 0.0042, "accuracy": 1.0, "learning_rate": 1.2810151207052465e-06, "epoch": 2.0844036697247708, "percentage": 69.61, "elapsed_time": "1:33:55", "remaining_time": "0:41:00", "throughput": "0.00", "total_tokens": 0}
143
+ {"current_steps": 286, "total_steps": 408, "loss": 0.0127, "accuracy": 1.0, "learning_rate": 1.2438273063093811e-06, "epoch": 2.0990825688073396, "percentage": 70.1, "elapsed_time": "1:34:27", "remaining_time": "0:40:17", "throughput": "0.00", "total_tokens": 0}
144
+ {"current_steps": 288, "total_steps": 408, "loss": 0.0101, "accuracy": 1.0, "learning_rate": 1.2070076771319536e-06, "epoch": 2.1137614678899084, "percentage": 70.59, "elapsed_time": "1:35:17", "remaining_time": "0:39:42", "throughput": "0.00", "total_tokens": 0}
145
+ {"current_steps": 290, "total_steps": 408, "loss": 0.0121, "accuracy": 1.0, "learning_rate": 1.1705670250356417e-06, "epoch": 2.128440366972477, "percentage": 71.08, "elapsed_time": "1:35:55", "remaining_time": "0:39:01", "throughput": "0.00", "total_tokens": 0}
146
+ {"current_steps": 292, "total_steps": 408, "loss": 0.0044, "accuracy": 1.0, "learning_rate": 1.1345160308046413e-06, "epoch": 2.143119266055046, "percentage": 71.57, "elapsed_time": "1:36:37", "remaining_time": "0:38:23", "throughput": "0.00", "total_tokens": 0}
147
+ {"current_steps": 294, "total_steps": 408, "loss": 0.0214, "accuracy": 0.984375, "learning_rate": 1.0988652610141154e-06, "epoch": 2.157798165137615, "percentage": 72.06, "elapsed_time": "1:37:10", "remaining_time": "0:37:40", "throughput": "0.00", "total_tokens": 0}
148
+ {"current_steps": 296, "total_steps": 408, "loss": 0.0081, "accuracy": 1.0, "learning_rate": 1.063625164933124e-06, "epoch": 2.1724770642201836, "percentage": 72.55, "elapsed_time": "1:37:48", "remaining_time": "0:37:00", "throughput": "0.00", "total_tokens": 0}
149
+ {"current_steps": 298, "total_steps": 408, "loss": 0.0098, "accuracy": 1.0, "learning_rate": 1.0288060714619359e-06, "epoch": 2.1871559633027524, "percentage": 73.04, "elapsed_time": "1:38:32", "remaining_time": "0:36:22", "throughput": "0.00", "total_tokens": 0}
150
+ {"current_steps": 300, "total_steps": 408, "loss": 0.0112, "accuracy": 1.0, "learning_rate": 9.944181861046188e-07, "epoch": 2.2018348623853212, "percentage": 73.53, "elapsed_time": "1:39:08", "remaining_time": "0:35:41", "throughput": "0.00", "total_tokens": 0}
151
+ {"current_steps": 302, "total_steps": 408, "loss": 0.0055, "accuracy": 1.0, "learning_rate": 9.604715879777986e-07, "epoch": 2.21651376146789, "percentage": 74.02, "elapsed_time": "1:39:43", "remaining_time": "0:35:00", "throughput": "0.00", "total_tokens": 0}
152
+ {"current_steps": 304, "total_steps": 408, "loss": 0.012, "accuracy": 1.0, "learning_rate": 9.269762268564616e-07, "epoch": 2.231192660550459, "percentage": 74.51, "elapsed_time": "1:40:27", "remaining_time": "0:34:21", "throughput": "0.00", "total_tokens": 0}
153
+ {"current_steps": 306, "total_steps": 408, "loss": 0.0133, "accuracy": 1.0, "learning_rate": 8.939419202576694e-07, "epoch": 2.2458715596330276, "percentage": 75.0, "elapsed_time": "1:40:56", "remaining_time": "0:33:38", "throughput": "0.00", "total_tokens": 0}
154
+ {"current_steps": 308, "total_steps": 408, "loss": 0.0059, "accuracy": 1.0, "learning_rate": 8.61378350563033e-07, "epoch": 2.2605504587155965, "percentage": 75.49, "elapsed_time": "1:41:28", "remaining_time": "0:32:56", "throughput": "0.00", "total_tokens": 0}
155
+ {"current_steps": 310, "total_steps": 408, "loss": 0.005, "accuracy": 1.0, "learning_rate": 8.292950621808022e-07, "epoch": 2.2752293577981653, "percentage": 75.98, "elapsed_time": "1:42:03", "remaining_time": "0:32:15", "throughput": "0.00", "total_tokens": 0}
156
+ {"current_steps": 312, "total_steps": 408, "loss": 0.011, "accuracy": 1.0, "learning_rate": 7.977014587483925e-07, "epoch": 2.289908256880734, "percentage": 76.47, "elapsed_time": "1:42:53", "remaining_time": "0:31:39", "throughput": "0.00", "total_tokens": 0}
157
+ {"current_steps": 314, "total_steps": 408, "loss": 0.0027, "accuracy": 1.0, "learning_rate": 7.666068003761684e-07, "epoch": 2.304587155963303, "percentage": 76.96, "elapsed_time": "1:43:30", "remaining_time": "0:30:59", "throughput": "0.00", "total_tokens": 0}
158
+ {"current_steps": 316, "total_steps": 408, "loss": 0.0048, "accuracy": 1.0, "learning_rate": 7.360202009332993e-07, "epoch": 2.3192660550458717, "percentage": 77.45, "elapsed_time": "1:44:10", "remaining_time": "0:30:19", "throughput": "0.00", "total_tokens": 0}
159
+ {"current_steps": 318, "total_steps": 408, "loss": 0.0066, "accuracy": 1.0, "learning_rate": 7.059506253764773e-07, "epoch": 2.3339449541284405, "percentage": 77.94, "elapsed_time": "1:44:49", "remaining_time": "0:29:40", "throughput": "0.00", "total_tokens": 0}
160
+ {"current_steps": 320, "total_steps": 408, "loss": 0.0055, "accuracy": 1.0, "learning_rate": 6.764068871222825e-07, "epoch": 2.3486238532110093, "percentage": 78.43, "elapsed_time": "1:45:28", "remaining_time": "0:29:00", "throughput": "0.00", "total_tokens": 0}
161
+ {"current_steps": 322, "total_steps": 408, "loss": 0.0029, "accuracy": 1.0, "learning_rate": 6.473976454639608e-07, "epoch": 2.363302752293578, "percentage": 78.92, "elapsed_time": "1:46:04", "remaining_time": "0:28:19", "throughput": "0.00", "total_tokens": 0}
162
+ {"current_steps": 324, "total_steps": 408, "loss": 0.0154, "accuracy": 1.0, "learning_rate": 6.189314030333796e-07, "epoch": 2.377981651376147, "percentage": 79.41, "elapsed_time": "1:46:40", "remaining_time": "0:27:39", "throughput": "0.00", "total_tokens": 0}
163
+ {"current_steps": 326, "total_steps": 408, "loss": 0.0022, "accuracy": 1.0, "learning_rate": 5.910165033089e-07, "epoch": 2.3926605504587157, "percentage": 79.9, "elapsed_time": "1:47:19", "remaining_time": "0:26:59", "throughput": "0.00", "total_tokens": 0}
164
+ {"current_steps": 328, "total_steps": 408, "loss": 0.0067, "accuracy": 1.0, "learning_rate": 5.636611281698956e-07, "epoch": 2.4073394495412845, "percentage": 80.39, "elapsed_time": "1:47:58", "remaining_time": "0:26:20", "throughput": "0.00", "total_tokens": 0}
165
+ {"current_steps": 330, "total_steps": 408, "loss": 0.0069, "accuracy": 1.0, "learning_rate": 5.368732954986389e-07, "epoch": 2.4220183486238533, "percentage": 80.88, "elapsed_time": "1:48:52", "remaining_time": "0:25:44", "throughput": "0.00", "total_tokens": 0}
166
+ {"current_steps": 332, "total_steps": 408, "loss": 0.0204, "accuracy": 0.984375, "learning_rate": 5.106608568302504e-07, "epoch": 2.436697247706422, "percentage": 81.37, "elapsed_time": "1:49:36", "remaining_time": "0:25:05", "throughput": "0.00", "total_tokens": 0}
167
+ {"current_steps": 334, "total_steps": 408, "loss": 0.0104, "accuracy": 1.0, "learning_rate": 4.850314950514124e-07, "epoch": 2.451376146788991, "percentage": 81.86, "elapsed_time": "1:50:08", "remaining_time": "0:24:24", "throughput": "0.00", "total_tokens": 0}
168
+ {"current_steps": 336, "total_steps": 408, "loss": 0.0033, "accuracy": 1.0, "learning_rate": 4.599927221485034e-07, "epoch": 2.4660550458715598, "percentage": 82.35, "elapsed_time": "1:50:48", "remaining_time": "0:23:44", "throughput": "0.00", "total_tokens": 0}
169
+ {"current_steps": 338, "total_steps": 408, "loss": 0.0036, "accuracy": 1.0, "learning_rate": 4.3555187700583175e-07, "epoch": 2.4807339449541286, "percentage": 82.84, "elapsed_time": "1:51:27", "remaining_time": "0:23:04", "throughput": "0.00", "total_tokens": 0}
170
+ {"current_steps": 340, "total_steps": 408, "loss": 0.0045, "accuracy": 1.0, "learning_rate": 4.1171612325460244e-07, "epoch": 2.4954128440366974, "percentage": 83.33, "elapsed_time": "1:52:08", "remaining_time": "0:22:25", "throughput": "0.00", "total_tokens": 0}
171
+ {"current_steps": 342, "total_steps": 408, "loss": 0.0096, "accuracy": 1.0, "learning_rate": 3.8849244717325206e-07, "epoch": 2.510091743119266, "percentage": 83.82, "elapsed_time": "1:52:48", "remaining_time": "0:21:46", "throughput": "0.00", "total_tokens": 0}
172
+ {"current_steps": 344, "total_steps": 408, "loss": 0.0099, "accuracy": 1.0, "learning_rate": 3.658876556397628e-07, "epoch": 2.524770642201835, "percentage": 84.31, "elapsed_time": "1:53:29", "remaining_time": "0:21:06", "throughput": "0.00", "total_tokens": 0}
173
+ {"current_steps": 346, "total_steps": 408, "loss": 0.0083, "accuracy": 1.0, "learning_rate": 3.4390837413656256e-07, "epoch": 2.539449541284404, "percentage": 84.8, "elapsed_time": "1:54:10", "remaining_time": "0:20:27", "throughput": "0.00", "total_tokens": 0}
174
+ {"current_steps": 348, "total_steps": 408, "loss": 0.0113, "accuracy": 1.0, "learning_rate": 3.225610448085903e-07, "epoch": 2.5541284403669726, "percentage": 85.29, "elapsed_time": "1:54:46", "remaining_time": "0:19:47", "throughput": "0.00", "total_tokens": 0}
175
+ {"current_steps": 350, "total_steps": 408, "loss": 0.0091, "accuracy": 1.0, "learning_rate": 3.018519245750989e-07, "epoch": 2.5688073394495414, "percentage": 85.78, "elapsed_time": "1:55:22", "remaining_time": "0:19:07", "throughput": "0.00", "total_tokens": 0}
176
+ {"current_steps": 352, "total_steps": 408, "loss": 0.0056, "accuracy": 1.0, "learning_rate": 2.817870832957459e-07, "epoch": 2.5834862385321102, "percentage": 86.27, "elapsed_time": "1:55:52", "remaining_time": "0:18:26", "throughput": "0.00", "total_tokens": 0}
177
+ {"current_steps": 354, "total_steps": 408, "loss": 0.0051, "accuracy": 1.0, "learning_rate": 2.6237240199151386e-07, "epoch": 2.598165137614679, "percentage": 86.76, "elapsed_time": "1:56:38", "remaining_time": "0:17:47", "throughput": "0.00", "total_tokens": 0}
178
+ {"current_steps": 356, "total_steps": 408, "loss": 0.009, "accuracy": 1.0, "learning_rate": 2.436135711209786e-07, "epoch": 2.612844036697248, "percentage": 87.25, "elapsed_time": "1:57:25", "remaining_time": "0:17:09", "throughput": "0.00", "total_tokens": 0}
179
+ {"current_steps": 358, "total_steps": 408, "loss": 0.0043, "accuracy": 1.0, "learning_rate": 2.2551608891243026e-07, "epoch": 2.6275229357798167, "percentage": 87.75, "elapsed_time": "1:58:09", "remaining_time": "0:16:30", "throughput": "0.00", "total_tokens": 0}
180
+ {"current_steps": 360, "total_steps": 408, "loss": 0.015, "accuracy": 1.0, "learning_rate": 2.0808525975233807e-07, "epoch": 2.6422018348623855, "percentage": 88.24, "elapsed_time": "1:58:43", "remaining_time": "0:15:49", "throughput": "0.00", "total_tokens": 0}
181
+ {"current_steps": 362, "total_steps": 408, "loss": 0.0096, "accuracy": 1.0, "learning_rate": 1.9132619263063144e-07, "epoch": 2.6568807339449543, "percentage": 88.73, "elapsed_time": "1:59:28", "remaining_time": "0:15:10", "throughput": "0.00", "total_tokens": 0}
182
+ {"current_steps": 364, "total_steps": 408, "loss": 0.0038, "accuracy": 1.0, "learning_rate": 1.7524379964325155e-07, "epoch": 2.671559633027523, "percentage": 89.22, "elapsed_time": "2:00:10", "remaining_time": "0:14:31", "throughput": "0.00", "total_tokens": 0}
183
+ {"current_steps": 366, "total_steps": 408, "loss": 0.0036, "accuracy": 1.0, "learning_rate": 1.5984279455240975e-07, "epoch": 2.686238532110092, "percentage": 89.71, "elapsed_time": "2:00:49", "remaining_time": "0:13:51", "throughput": "0.00", "total_tokens": 0}
184
+ {"current_steps": 368, "total_steps": 408, "loss": 0.0037, "accuracy": 1.0, "learning_rate": 1.451276914049818e-07, "epoch": 2.7009174311926607, "percentage": 90.2, "elapsed_time": "2:01:26", "remaining_time": "0:13:12", "throughput": "0.00", "total_tokens": 0}
185
+ {"current_steps": 370, "total_steps": 408, "loss": 0.0016, "accuracy": 1.0, "learning_rate": 1.3110280320943692e-07, "epoch": 2.7155963302752295, "percentage": 90.69, "elapsed_time": "2:02:02", "remaining_time": "0:12:32", "throughput": "0.00", "total_tokens": 0}
186
+ {"current_steps": 372, "total_steps": 408, "loss": 0.0049, "accuracy": 1.0, "learning_rate": 1.1777224067169218e-07, "epoch": 2.7302752293577983, "percentage": 91.18, "elapsed_time": "2:02:42", "remaining_time": "0:11:52", "throughput": "0.00", "total_tokens": 0}
187
+ {"current_steps": 374, "total_steps": 408, "loss": 0.0062, "accuracy": 1.0, "learning_rate": 1.0513991099025872e-07, "epoch": 2.744954128440367, "percentage": 91.67, "elapsed_time": "2:03:26", "remaining_time": "0:11:13", "throughput": "0.00", "total_tokens": 0}
188
+ {"current_steps": 376, "total_steps": 408, "loss": 0.0048, "accuracy": 1.0, "learning_rate": 9.320951671104194e-08, "epoch": 2.759633027522936, "percentage": 92.16, "elapsed_time": "2:04:08", "remaining_time": "0:10:33", "throughput": "0.00", "total_tokens": 0}
189
+ {"current_steps": 378, "total_steps": 408, "loss": 0.0017, "accuracy": 1.0, "learning_rate": 8.198455464212108e-08, "epoch": 2.7743119266055047, "percentage": 92.65, "elapsed_time": "2:04:45", "remaining_time": "0:09:54", "throughput": "0.00", "total_tokens": 0}
190
+ {"current_steps": 380, "total_steps": 408, "loss": 0.0027, "accuracy": 1.0, "learning_rate": 7.146831482883115e-08, "epoch": 2.7889908256880735, "percentage": 93.14, "elapsed_time": "2:05:17", "remaining_time": "0:09:13", "throughput": "0.00", "total_tokens": 0}
191
+ {"current_steps": 382, "total_steps": 408, "loss": 0.0049, "accuracy": 1.0, "learning_rate": 6.16638795894492e-08, "epoch": 2.8036697247706424, "percentage": 93.63, "elapsed_time": "2:05:50", "remaining_time": "0:08:33", "throughput": "0.00", "total_tokens": 0}
192
+ {"current_steps": 384, "total_steps": 408, "loss": 0.008, "accuracy": 1.0, "learning_rate": 5.257412261176375e-08, "epoch": 2.818348623853211, "percentage": 94.12, "elapsed_time": "2:06:37", "remaining_time": "0:07:54", "throughput": "0.00", "total_tokens": 0}
193
+ {"current_steps": 386, "total_steps": 408, "loss": 0.0039, "accuracy": 1.0, "learning_rate": 4.4201708110795384e-08, "epoch": 2.83302752293578, "percentage": 94.61, "elapsed_time": "2:07:11", "remaining_time": "0:07:14", "throughput": "0.00", "total_tokens": 0}
194
+ {"current_steps": 388, "total_steps": 408, "loss": 0.0121, "accuracy": 1.0, "learning_rate": 3.654909004791152e-08, "epoch": 2.847706422018349, "percentage": 95.1, "elapsed_time": "2:07:54", "remaining_time": "0:06:35", "throughput": "0.00", "total_tokens": 0}
195
+ {"current_steps": 390, "total_steps": 408, "loss": 0.0069, "accuracy": 1.0, "learning_rate": 2.9618511411570462e-08, "epoch": 2.8623853211009176, "percentage": 95.59, "elapsed_time": "2:08:25", "remaining_time": "0:05:55", "throughput": "0.00", "total_tokens": 0}
196
+ {"current_steps": 392, "total_steps": 408, "loss": 0.0125, "accuracy": 1.0, "learning_rate": 2.3412003559898088e-08, "epoch": 2.8770642201834864, "percentage": 96.08, "elapsed_time": "2:09:01", "remaining_time": "0:05:15", "throughput": "0.00", "total_tokens": 0}
197
+ {"current_steps": 394, "total_steps": 408, "loss": 0.0027, "accuracy": 1.0, "learning_rate": 1.793138562529634e-08, "epoch": 2.891743119266055, "percentage": 96.57, "elapsed_time": "2:09:46", "remaining_time": "0:04:36", "throughput": "0.00", "total_tokens": 0}
198
+ {"current_steps": 396, "total_steps": 408, "loss": 0.0135, "accuracy": 1.0, "learning_rate": 1.317826398125277e-08, "epoch": 2.906422018348624, "percentage": 97.06, "elapsed_time": "2:10:44", "remaining_time": "0:03:57", "throughput": "0.00", "total_tokens": 0}
199
+ {"current_steps": 398, "total_steps": 408, "loss": 0.0064, "accuracy": 1.0, "learning_rate": 9.15403177151275e-09, "epoch": 2.921100917431193, "percentage": 97.55, "elapsed_time": "2:11:25", "remaining_time": "0:03:18", "throughput": "0.00", "total_tokens": 0}
200
+ {"current_steps": 400, "total_steps": 408, "loss": 0.0027, "accuracy": 1.0, "learning_rate": 5.85986850174608e-09, "epoch": 2.9357798165137616, "percentage": 98.04, "elapsed_time": "2:11:59", "remaining_time": "0:02:38", "throughput": "0.00", "total_tokens": 0}
201
+ {"current_steps": 402, "total_steps": 408, "loss": 0.0032, "accuracy": 1.0, "learning_rate": 3.296739693834927e-09, "epoch": 2.9504587155963304, "percentage": 98.53, "elapsed_time": "2:12:35", "remaining_time": "0:01:58", "throughput": "0.00", "total_tokens": 0}
202
+ {"current_steps": 404, "total_steps": 408, "loss": 0.0034, "accuracy": 1.0, "learning_rate": 1.4653966028774225e-09, "epoch": 2.9651376146788992, "percentage": 99.02, "elapsed_time": "2:13:07", "remaining_time": "0:01:19", "throughput": "0.00", "total_tokens": 0}
203
+ {"current_steps": 406, "total_steps": 408, "loss": 0.0028, "accuracy": 1.0, "learning_rate": 3.6637599699351766e-10, "epoch": 2.979816513761468, "percentage": 99.51, "elapsed_time": "2:13:48", "remaining_time": "0:00:39", "throughput": "0.00", "total_tokens": 0}
204
+ {"current_steps": 408, "total_steps": 408, "loss": 0.0316, "accuracy": 0.984375, "learning_rate": 0.0, "epoch": 2.994495412844037, "percentage": 100.0, "elapsed_time": "2:14:29", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
205
+ {"current_steps": 408, "total_steps": 408, "epoch": 2.994495412844037, "percentage": 100.0, "elapsed_time": "2:14:29", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530f40812c81c9e4d103015fa8c5d5e3b76eb82a429351470c903458b5348c14
3
+ size 5304
training_loss.png ADDED
training_rewards_accuracies.png ADDED
vocab.json ADDED
The diff for this file is too large to render. See raw diff