bhaviktheslider commited on
Commit
1c9851f
·
verified ·
1 Parent(s): 33605fb

Model save

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-1.5B-Instruct
3
+ library_name: transformers
4
+ model_name: qwen-2.5-7b-r1-countdown
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - grpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for qwen-2.5-7b-r1-countdown
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="bhaviktheslider/qwen-2.5-7b-r1-countdown", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bhavik18385-mastercontrol/grpo_training/runs/cnqeubat)
31
+
32
+
33
+ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.14.0
38
+ - Transformers: 4.48.1
39
+ - Pytorch: 2.5.1
40
+ - Datasets: 3.1.0
41
+ - Tokenizers: 0.21.0
42
+
43
+ ## Citations
44
+
45
+ Cite GRPO as:
46
+
47
+ ```bibtex
48
+ @article{zhihong2024deepseekmath,
49
+ title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
50
+ author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
51
+ year = 2024,
52
+ eprint = {arXiv:2402.03300},
53
+ }
54
+
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 0.0058,
5
+ "train_samples_per_second": 3851297.791,
6
+ "train_steps_per_second": 17193.294
7
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Qwen/Qwen2.5-1.5B-Instruct",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 1536,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8960,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 21,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": null,
23
+ "tie_word_embeddings": true,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.48.1",
26
+ "use_cache": false,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 151936
29
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.48.1"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382b4b911cbd5f030d212ec595af082e5f83775a5fdb47f9f4daeb004c8e8abb
3
+ size 4996670464
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d42d0ef3ef62eee7784a04b7885a7c3a3b69fb7045f8a3c4f0d93744d01c51
3
+ size 1178224960
model.safetensors.index.json ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 6174857216
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
7
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
18
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
28
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
30
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
37
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
40
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
42
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
49
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
52
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
54
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
61
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
64
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
66
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
73
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
76
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
78
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
85
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
88
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
90
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
94
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
97
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
100
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
102
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
109
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
112
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
114
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
121
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
124
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
126
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
128
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
130
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
131
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
132
+ "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
133
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
134
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
135
+ "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
136
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
137
+ "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
138
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
139
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
140
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
141
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
144
+ "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
145
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
146
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
148
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
149
+ "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
150
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
152
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
153
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
154
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
155
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
156
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
157
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
158
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
159
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
160
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
161
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
162
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
164
+ "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
165
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
166
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
167
+ "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
168
+ "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
169
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
170
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
171
+ "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
172
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
173
+ "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
174
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
175
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
176
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
178
+ "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
179
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
180
+ "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
181
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
182
+ "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
183
+ "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
184
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
185
+ "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
186
+ "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
187
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
193
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
196
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
198
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
201
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
204
+ "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
205
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
207
+ "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
208
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
210
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
211
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
212
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
215
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
216
+ "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
217
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
218
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
220
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
222
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
223
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
224
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
226
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
228
+ "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
229
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
232
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
233
+ "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
234
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
236
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
237
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
241
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
244
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
245
+ "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
246
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
248
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
251
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
252
+ "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
253
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
255
+ "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
256
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
258
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
259
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
260
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
261
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
262
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
264
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
265
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
266
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
267
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
268
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
270
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
272
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
273
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
274
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
275
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
276
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
277
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
278
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
279
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
280
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
281
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
282
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
283
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
284
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
285
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
286
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
287
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
288
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
289
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
290
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
291
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
292
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
293
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
294
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
295
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
296
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
297
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
298
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
299
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
300
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
301
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
302
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
303
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
304
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
305
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
306
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
307
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
308
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
309
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
310
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
311
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
312
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
313
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
314
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
315
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
316
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
317
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
318
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
319
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
320
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
321
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
322
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
323
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
324
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
325
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
326
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
327
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
328
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
329
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
330
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
331
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
332
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
333
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
334
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
335
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
336
+ "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
337
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
338
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
339
+ "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
340
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
342
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
343
+ "model.norm.weight": "model-00002-of-00002.safetensors"
344
+ }
345
+ }
pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9feda8f2156fef3fa2cc01f7d75a15d0aefce0ad09cca2debbf3bc31c2ab548
3
+ size 4996733620
pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ed3fa7fc59c039ada3ffc9c43460959bbeafbe4cbfc5f2fa1139927e24291e
3
+ size 1178243330
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 6174857216
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00001-of-00002.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
13
+ "model.layers.0.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
14
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
15
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
16
+ "model.layers.0.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
17
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
18
+ "model.layers.0.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
19
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
20
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
21
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
22
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
23
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
24
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
25
+ "model.layers.1.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
26
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
27
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
28
+ "model.layers.1.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
29
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
30
+ "model.layers.1.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
31
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
32
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
33
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
35
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
36
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
37
+ "model.layers.10.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
38
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
39
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
40
+ "model.layers.10.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
41
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
42
+ "model.layers.10.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
43
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
44
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
45
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
46
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
47
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
48
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
49
+ "model.layers.11.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
50
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
51
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
52
+ "model.layers.11.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
53
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
54
+ "model.layers.11.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
55
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
56
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
57
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
58
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
59
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
60
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
61
+ "model.layers.12.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
62
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
63
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
64
+ "model.layers.12.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
65
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
66
+ "model.layers.12.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
67
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
68
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
69
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
70
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
71
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
72
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
73
+ "model.layers.13.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
74
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
75
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
76
+ "model.layers.13.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
77
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
78
+ "model.layers.13.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
79
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
80
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
81
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
83
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
84
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
85
+ "model.layers.14.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
86
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
87
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
88
+ "model.layers.14.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
89
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
90
+ "model.layers.14.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
91
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
92
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
93
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
94
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
95
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
96
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
97
+ "model.layers.15.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
98
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
99
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
100
+ "model.layers.15.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
101
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
102
+ "model.layers.15.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
103
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
104
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
105
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
106
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
107
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
108
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
109
+ "model.layers.16.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
110
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
111
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
112
+ "model.layers.16.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
113
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "model.layers.16.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
115
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
116
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
117
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
118
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
119
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
120
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
121
+ "model.layers.17.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
122
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
123
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
124
+ "model.layers.17.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
125
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
126
+ "model.layers.17.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
127
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
128
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
129
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
130
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
131
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
132
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
133
+ "model.layers.18.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
134
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
135
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
136
+ "model.layers.18.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
137
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
138
+ "model.layers.18.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
139
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
140
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
141
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
142
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
143
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
144
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
145
+ "model.layers.19.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
146
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
147
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
148
+ "model.layers.19.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
149
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
150
+ "model.layers.19.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
151
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
152
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
153
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
154
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
155
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
156
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
157
+ "model.layers.2.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
158
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
159
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
160
+ "model.layers.2.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
161
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
162
+ "model.layers.2.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
163
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
164
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
165
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
166
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
167
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
168
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
169
+ "model.layers.20.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
170
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
171
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
172
+ "model.layers.20.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
173
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
174
+ "model.layers.20.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
175
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
176
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
177
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
178
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
179
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
180
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
181
+ "model.layers.21.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
182
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
183
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
184
+ "model.layers.21.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
185
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
186
+ "model.layers.21.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
187
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
188
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
189
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
190
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
191
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
192
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
193
+ "model.layers.22.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
194
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
195
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
196
+ "model.layers.22.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
197
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
198
+ "model.layers.22.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
199
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
200
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
201
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
202
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
203
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
204
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
205
+ "model.layers.23.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
206
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
207
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
208
+ "model.layers.23.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
209
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
210
+ "model.layers.23.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
211
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
212
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
213
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
214
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
215
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
216
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
217
+ "model.layers.24.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
218
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
219
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
220
+ "model.layers.24.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
221
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
222
+ "model.layers.24.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
223
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
224
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
225
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
226
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
227
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
228
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
229
+ "model.layers.25.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
230
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
231
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
232
+ "model.layers.25.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
233
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
234
+ "model.layers.25.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
235
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
236
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
237
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
238
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
239
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
240
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
241
+ "model.layers.26.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
242
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
243
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
244
+ "model.layers.26.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
245
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
246
+ "model.layers.26.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
247
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
248
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
249
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
250
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
251
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
252
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
253
+ "model.layers.27.self_attn.k_proj.bias": "pytorch_model-00002-of-00002.bin",
254
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
255
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
256
+ "model.layers.27.self_attn.q_proj.bias": "pytorch_model-00002-of-00002.bin",
257
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
258
+ "model.layers.27.self_attn.v_proj.bias": "pytorch_model-00002-of-00002.bin",
259
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
260
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
261
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
262
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
263
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
264
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
265
+ "model.layers.3.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
266
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
267
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
268
+ "model.layers.3.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
269
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
270
+ "model.layers.3.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
271
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
272
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
273
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
274
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
275
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
276
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
277
+ "model.layers.4.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
278
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
279
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
280
+ "model.layers.4.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
281
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
282
+ "model.layers.4.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
283
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
284
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
285
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
286
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
287
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
288
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
289
+ "model.layers.5.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
290
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
291
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
292
+ "model.layers.5.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
293
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
294
+ "model.layers.5.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
295
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
296
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
297
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
298
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
299
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
300
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
301
+ "model.layers.6.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
302
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
303
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
304
+ "model.layers.6.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
305
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
306
+ "model.layers.6.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
307
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
308
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
309
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
310
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
311
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
312
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
313
+ "model.layers.7.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
314
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
315
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
316
+ "model.layers.7.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
317
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
318
+ "model.layers.7.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
319
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
320
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
321
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
322
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
323
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
324
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
325
+ "model.layers.8.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
326
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
327
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
328
+ "model.layers.8.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
329
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
330
+ "model.layers.8.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
331
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
332
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
333
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
334
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
335
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
336
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
337
+ "model.layers.9.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin",
338
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
339
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
340
+ "model.layers.9.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin",
341
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
342
+ "model.layers.9.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin",
343
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
344
+ "model.norm.weight": "pytorch_model-00002-of-00002.bin"
345
+ }
346
+ }
runs/Feb01_03-52-21_129c3b823eb8/events.out.tfevents.1738382009.129c3b823eb8.57352.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5291b8f1a30d95e9962cc8882b339753b930af3a03860abe24346d7417b7360
3
+ size 5446
runs/Feb01_03-58-24_129c3b823eb8/events.out.tfevents.1738382368.129c3b823eb8.60932.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d56955271f454a5edff912e1067717eb7543d74c2d4bdc6828bddb04a18b680
3
+ size 5446
runs/Feb01_04-01-00_129c3b823eb8/events.out.tfevents.1738382525.129c3b823eb8.64334.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be19d658343c6f7e10fa64d5df632dfbee1c6739351a19931382498a882fd36f
3
+ size 5800
runs/Feb01_04-01-00_129c3b823eb8/events.out.tfevents.1738382538.129c3b823eb8.64334.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692137af554b71fde253fd37809446da49b1825936b38772b31a3f928104166d
3
+ size 5800
runs/Jan30_23-42-18_129c3b823eb8/events.out.tfevents.1738280589.129c3b823eb8.33136.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15a80399b4b3317986959585f7df1db311d419fe5598ff0f2e6a1c13de5a87e
3
+ size 5445
runs/Jan30_23-48-03_129c3b823eb8/events.out.tfevents.1738280933.129c3b823eb8.36530.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df4891e2953fc31e34df9c0239a88430b1c88fafcb71fad0ea43ffb36372749
3
+ size 12165
runs/Jan31_07-16-36_129c3b823eb8/events.out.tfevents.1738307857.129c3b823eb8.42757.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a93df86d0e7d998480a1f79e211c6fd290187a5bad843bfe721c0d9c1108c32
3
+ size 5445
runs/Jan31_07-23-28_129c3b823eb8/events.out.tfevents.1738308269.129c3b823eb8.46389.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db0ccc7c191d20469a4afd361b9334747baa690ec42354c9af506186f69e74c5
3
+ size 72766
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "padding_side": "left",
206
+ "split_special_tokens": false,
207
+ "tokenizer_class": "Qwen2Tokenizer",
208
+ "unk_token": null
209
+ }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.0,
4
+ "train_runtime": 0.0058,
5
+ "train_samples_per_second": 3851297.791,
6
+ "train_steps_per_second": 17193.294
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1667 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 6.1244167962674965,
5
+ "eval_steps": 500,
6
+ "global_step": 250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "completion_length": 736.4702529907227,
13
+ "epoch": 0.049766718506998445,
14
+ "grad_norm": 0.2507069706916809,
15
+ "kl": 0.0,
16
+ "learning_rate": 7.142857142857142e-08,
17
+ "loss": 0.0,
18
+ "reward": 0.04415178840281442,
19
+ "reward_std": 0.07034091584500857,
20
+ "rewards/equation_reward_func": 0.04415178793715313,
21
+ "rewards/format_reward_func": 0.0,
22
+ "step": 2
23
+ },
24
+ {
25
+ "completion_length": 723.1704015731812,
26
+ "epoch": 0.09953343701399689,
27
+ "grad_norm": 0.19884330034255981,
28
+ "kl": 2.0936699339557663e-05,
29
+ "learning_rate": 1.4285714285714285e-07,
30
+ "loss": 0.0,
31
+ "reward": 0.040647323767188936,
32
+ "reward_std": 0.0637543131451821,
33
+ "rewards/equation_reward_func": 0.04064732347615063,
34
+ "rewards/format_reward_func": 0.0,
35
+ "step": 4
36
+ },
37
+ {
38
+ "completion_length": 726.5163822174072,
39
+ "epoch": 0.14930015552099535,
40
+ "grad_norm": 0.21145105361938477,
41
+ "kl": 0.00019492170304147294,
42
+ "learning_rate": 2.1428571428571426e-07,
43
+ "loss": 0.0,
44
+ "reward": 0.04095238326408435,
45
+ "reward_std": 0.06441530691517983,
46
+ "rewards/equation_reward_func": 0.040952383089461364,
47
+ "rewards/format_reward_func": 0.0,
48
+ "step": 6
49
+ },
50
+ {
51
+ "completion_length": 737.8207015991211,
52
+ "epoch": 0.19906687402799378,
53
+ "grad_norm": 0.2020396590232849,
54
+ "kl": 0.020334478189397487,
55
+ "learning_rate": 2.857142857142857e-07,
56
+ "loss": 0.0,
57
+ "reward": 0.03635416827455629,
58
+ "reward_std": 0.05670656039728783,
59
+ "rewards/equation_reward_func": 0.03635416833276395,
60
+ "rewards/format_reward_func": 0.0,
61
+ "step": 8
62
+ },
63
+ {
64
+ "completion_length": 718.5461435317993,
65
+ "epoch": 0.24883359253499224,
66
+ "grad_norm": 27.479997634887695,
67
+ "kl": 9.990212610488015,
68
+ "learning_rate": 3.5714285714285716e-07,
69
+ "loss": 0.01,
70
+ "reward": 0.04543898967676796,
71
+ "reward_std": 0.07194261607946828,
72
+ "rewards/equation_reward_func": 0.04543899020063691,
73
+ "rewards/format_reward_func": 0.0,
74
+ "step": 10
75
+ },
76
+ {
77
+ "completion_length": 721.6964378356934,
78
+ "epoch": 0.2986003110419907,
79
+ "grad_norm": 0.19479116797447205,
80
+ "kl": 0.005109551766508957,
81
+ "learning_rate": 4.285714285714285e-07,
82
+ "loss": 0.0,
83
+ "reward": 0.04148065741173923,
84
+ "reward_std": 0.0677548690000549,
85
+ "rewards/equation_reward_func": 0.04148065741173923,
86
+ "rewards/format_reward_func": 0.0,
87
+ "step": 12
88
+ },
89
+ {
90
+ "completion_length": 725.9003086090088,
91
+ "epoch": 0.3483670295489891,
92
+ "grad_norm": 0.24158786237239838,
93
+ "kl": 0.502096803898894,
94
+ "learning_rate": 5e-07,
95
+ "loss": 0.0005,
96
+ "reward": 0.04821428797731642,
97
+ "reward_std": 0.07803500922454987,
98
+ "rewards/equation_reward_func": 0.048214288559393026,
99
+ "rewards/format_reward_func": 0.0,
100
+ "step": 14
101
+ },
102
+ {
103
+ "completion_length": 722.5781373977661,
104
+ "epoch": 0.39813374805598756,
105
+ "grad_norm": 0.290544331073761,
106
+ "kl": 0.23321715661586495,
107
+ "learning_rate": 4.999740409224932e-07,
108
+ "loss": 0.0002,
109
+ "reward": 0.05134672833082732,
110
+ "reward_std": 0.07946415679180063,
111
+ "rewards/equation_reward_func": 0.05134672856365796,
112
+ "rewards/format_reward_func": 0.0,
113
+ "step": 16
114
+ },
115
+ {
116
+ "completion_length": 723.7433156967163,
117
+ "epoch": 0.447900466562986,
118
+ "grad_norm": 4.716856479644775,
119
+ "kl": 0.8582769820350222,
120
+ "learning_rate": 4.998961690809627e-07,
121
+ "loss": 0.0009,
122
+ "reward": 0.050446430934243836,
123
+ "reward_std": 0.07721506280358881,
124
+ "rewards/equation_reward_func": 0.050446430992451496,
125
+ "rewards/format_reward_func": 0.0,
126
+ "step": 18
127
+ },
128
+ {
129
+ "completion_length": 729.6198072433472,
130
+ "epoch": 0.4976671850699845,
131
+ "grad_norm": 0.23771615326404572,
132
+ "kl": 0.3220994914881885,
133
+ "learning_rate": 4.997664006472578e-07,
134
+ "loss": 0.0003,
135
+ "reward": 0.045706847246037796,
136
+ "reward_std": 0.07288302374945488,
137
+ "rewards/equation_reward_func": 0.045706847246037796,
138
+ "rewards/format_reward_func": 0.0,
139
+ "step": 20
140
+ },
141
+ {
142
+ "completion_length": 712.8631057739258,
143
+ "epoch": 0.5474339035769828,
144
+ "grad_norm": 0.3750320374965668,
145
+ "kl": 0.27479040302569047,
146
+ "learning_rate": 4.995847625707292e-07,
147
+ "loss": 0.0003,
148
+ "reward": 0.05489583619055338,
149
+ "reward_std": 0.0833382241835352,
150
+ "rewards/equation_reward_func": 0.054895836423384026,
151
+ "rewards/format_reward_func": 0.0,
152
+ "step": 22
153
+ },
154
+ {
155
+ "completion_length": 731.4576015472412,
156
+ "epoch": 0.5972006220839814,
157
+ "grad_norm": 0.2089901864528656,
158
+ "kl": 0.12606932656490244,
159
+ "learning_rate": 4.993512925726318e-07,
160
+ "loss": 0.0001,
161
+ "reward": 0.0600520860607503,
162
+ "reward_std": 0.0899482914537657,
163
+ "rewards/equation_reward_func": 0.06005208553688135,
164
+ "rewards/format_reward_func": 0.0,
165
+ "step": 24
166
+ },
167
+ {
168
+ "completion_length": 706.6056671142578,
169
+ "epoch": 0.6469673405909798,
170
+ "grad_norm": 0.17620234191417694,
171
+ "kl": 0.1556346261058934,
172
+ "learning_rate": 4.990660391382923e-07,
173
+ "loss": 0.0002,
174
+ "reward": 0.05229166932986118,
175
+ "reward_std": 0.07350753628998064,
176
+ "rewards/equation_reward_func": 0.05229166956269182,
177
+ "rewards/format_reward_func": 0.0,
178
+ "step": 26
179
+ },
180
+ {
181
+ "completion_length": 727.1808137893677,
182
+ "epoch": 0.6967340590979783,
183
+ "grad_norm": 0.19479602575302124,
184
+ "kl": 0.12168441573157907,
185
+ "learning_rate": 4.987290615070384e-07,
186
+ "loss": 0.0001,
187
+ "reward": 0.053683038655435666,
188
+ "reward_std": 0.08042177859169897,
189
+ "rewards/equation_reward_func": 0.053683039121096954,
190
+ "rewards/format_reward_func": 0.0,
191
+ "step": 28
192
+ },
193
+ {
194
+ "completion_length": 720.8891496658325,
195
+ "epoch": 0.7465007776049767,
196
+ "grad_norm": 0.1857473999261856,
197
+ "kl": 0.1632600230514072,
198
+ "learning_rate": 4.983404296598978e-07,
199
+ "loss": 0.0002,
200
+ "reward": 0.05391369271092117,
201
+ "reward_std": 0.08413292915793136,
202
+ "rewards/equation_reward_func": 0.053913692419882864,
203
+ "rewards/format_reward_func": 0.0,
204
+ "step": 30
205
+ },
206
+ {
207
+ "completion_length": 720.4337940216064,
208
+ "epoch": 0.7962674961119751,
209
+ "grad_norm": 0.23092247545719147,
210
+ "kl": 0.15535293571883813,
211
+ "learning_rate": 4.979002243050646e-07,
212
+ "loss": 0.0002,
213
+ "reward": 0.05988095561042428,
214
+ "reward_std": 0.09168167802272364,
215
+ "rewards/equation_reward_func": 0.05988095601787791,
216
+ "rewards/format_reward_func": 0.0,
217
+ "step": 32
218
+ },
219
+ {
220
+ "completion_length": 718.6845378875732,
221
+ "epoch": 0.8460342146189735,
222
+ "grad_norm": 0.23407958447933197,
223
+ "kl": 0.25782948260894045,
224
+ "learning_rate": 4.974085368611381e-07,
225
+ "loss": 0.0003,
226
+ "reward": 0.06691220620996319,
227
+ "reward_std": 0.09768064138188493,
228
+ "rewards/equation_reward_func": 0.06691220562788658,
229
+ "rewards/format_reward_func": 0.0,
230
+ "step": 34
231
+ },
232
+ {
233
+ "completion_length": 718.0454006195068,
234
+ "epoch": 0.895800933125972,
235
+ "grad_norm": 0.3076172471046448,
236
+ "kl": 0.2404527408652939,
237
+ "learning_rate": 4.968654694381379e-07,
238
+ "loss": 0.0002,
239
+ "reward": 0.07349702704232186,
240
+ "reward_std": 0.10955648736853618,
241
+ "rewards/equation_reward_func": 0.07349702733336017,
242
+ "rewards/format_reward_func": 0.0,
243
+ "step": 36
244
+ },
245
+ {
246
+ "completion_length": 704.1488237380981,
247
+ "epoch": 0.9455676516329704,
248
+ "grad_norm": 0.2561110258102417,
249
+ "kl": 0.43795167771168053,
250
+ "learning_rate": 4.962711348162987e-07,
251
+ "loss": 0.0004,
252
+ "reward": 0.06241815793327987,
253
+ "reward_std": 0.09217380215704907,
254
+ "rewards/equation_reward_func": 0.0624181583407335,
255
+ "rewards/format_reward_func": 0.0,
256
+ "step": 38
257
+ },
258
+ {
259
+ "completion_length": 707.3921279907227,
260
+ "epoch": 0.995334370139969,
261
+ "grad_norm": 0.3400561511516571,
262
+ "kl": 0.5494289128109813,
263
+ "learning_rate": 4.956256564226487e-07,
264
+ "loss": 0.0005,
265
+ "reward": 0.0764508958091028,
266
+ "reward_std": 0.11110821401234716,
267
+ "rewards/equation_reward_func": 0.07645089708967134,
268
+ "rewards/format_reward_func": 0.0,
269
+ "step": 40
270
+ },
271
+ {
272
+ "completion_length": 715.0272221156529,
273
+ "epoch": 1.0497667185069985,
274
+ "grad_norm": 0.26081565022468567,
275
+ "kl": 0.4236157455614635,
276
+ "learning_rate": 4.949291683053768e-07,
277
+ "loss": 0.0005,
278
+ "reward": 0.07186394860701902,
279
+ "reward_std": 0.10362207902861494,
280
+ "rewards/equation_reward_func": 0.07186394876667432,
281
+ "rewards/format_reward_func": 0.0,
282
+ "step": 42
283
+ },
284
+ {
285
+ "completion_length": 714.9486722946167,
286
+ "epoch": 1.0995334370139969,
287
+ "grad_norm": 0.29378727078437805,
288
+ "kl": 0.3755593653768301,
289
+ "learning_rate": 4.941818151059955e-07,
290
+ "loss": 0.0004,
291
+ "reward": 0.0799404798890464,
292
+ "reward_std": 0.11443577655882109,
293
+ "rewards/equation_reward_func": 0.07994047965621576,
294
+ "rewards/format_reward_func": 0.0,
295
+ "step": 44
296
+ },
297
+ {
298
+ "completion_length": 727.829628944397,
299
+ "epoch": 1.1493001555209954,
300
+ "grad_norm": 2045.599365234375,
301
+ "kl": 128.7541933595203,
302
+ "learning_rate": 4.933837520293017e-07,
303
+ "loss": 0.1288,
304
+ "reward": 0.06808780113351531,
305
+ "reward_std": 0.09949399236938916,
306
+ "rewards/equation_reward_func": 0.06808780090068467,
307
+ "rewards/format_reward_func": 0.0,
308
+ "step": 46
309
+ },
310
+ {
311
+ "completion_length": 709.632453918457,
312
+ "epoch": 1.1990668740279937,
313
+ "grad_norm": 0.2698291838169098,
314
+ "kl": 0.4989726666826755,
315
+ "learning_rate": 4.925351448111454e-07,
316
+ "loss": 0.0005,
317
+ "reward": 0.09389881315291859,
318
+ "reward_std": 0.13221543522377033,
319
+ "rewards/equation_reward_func": 0.09389881303650327,
320
+ "rewards/format_reward_func": 0.0,
321
+ "step": 48
322
+ },
323
+ {
324
+ "completion_length": 719.485878944397,
325
+ "epoch": 1.2488335925349923,
326
+ "grad_norm": 0.36381521821022034,
327
+ "kl": 0.550471473718062,
328
+ "learning_rate": 4.91636169684011e-07,
329
+ "loss": 0.0006,
330
+ "reward": 0.08360863462439738,
331
+ "reward_std": 0.11854775344545487,
332
+ "rewards/equation_reward_func": 0.08360863421694376,
333
+ "rewards/format_reward_func": 0.0,
334
+ "step": 50
335
+ },
336
+ {
337
+ "completion_length": 725.6599855422974,
338
+ "epoch": 1.2986003110419908,
339
+ "grad_norm": 0.3374347686767578,
340
+ "kl": 0.663099701050669,
341
+ "learning_rate": 4.906870133404186e-07,
342
+ "loss": 0.0007,
343
+ "reward": 0.08503720644512214,
344
+ "reward_std": 0.12180299674218986,
345
+ "rewards/equation_reward_func": 0.0850372067943681,
346
+ "rewards/format_reward_func": 0.0,
347
+ "step": 52
348
+ },
349
+ {
350
+ "completion_length": 723.972484588623,
351
+ "epoch": 1.3483670295489891,
352
+ "grad_norm": 1.0345810651779175,
353
+ "kl": 0.9573397457133979,
354
+ "learning_rate": 4.896878728941531e-07,
355
+ "loss": 0.001,
356
+ "reward": 0.09177827867097221,
357
+ "reward_std": 0.12253864679951221,
358
+ "rewards/equation_reward_func": 0.09177827744861133,
359
+ "rewards/format_reward_func": 0.0,
360
+ "step": 54
361
+ },
362
+ {
363
+ "completion_length": 712.2269496917725,
364
+ "epoch": 1.3981337480559874,
365
+ "grad_norm": 0.27968963980674744,
366
+ "kl": 0.8391579431481659,
367
+ "learning_rate": 4.886389558393284e-07,
368
+ "loss": 0.0008,
369
+ "reward": 0.08570684934966266,
370
+ "reward_std": 0.1181660912843654,
371
+ "rewards/equation_reward_func": 0.08570684841834009,
372
+ "rewards/format_reward_func": 0.0,
373
+ "step": 56
374
+ },
375
+ {
376
+ "completion_length": 730.5327529907227,
377
+ "epoch": 1.447900466562986,
378
+ "grad_norm": 0.28138798475265503,
379
+ "kl": 0.9094656470697373,
380
+ "learning_rate": 4.875404800072976e-07,
381
+ "loss": 0.0009,
382
+ "reward": 0.08794643338478636,
383
+ "reward_std": 0.12104765651747584,
384
+ "rewards/equation_reward_func": 0.08794643309374806,
385
+ "rewards/format_reward_func": 0.0,
386
+ "step": 58
387
+ },
388
+ {
389
+ "completion_length": 732.3861742019653,
390
+ "epoch": 1.4976671850699845,
391
+ "grad_norm": 0.34412360191345215,
392
+ "kl": 1.009782899171114,
393
+ "learning_rate": 4.86392673521415e-07,
394
+ "loss": 0.001,
395
+ "reward": 0.10000744601711631,
396
+ "reward_std": 0.13957228315121029,
397
+ "rewards/equation_reward_func": 0.10000744566787034,
398
+ "rewards/format_reward_func": 0.0,
399
+ "step": 60
400
+ },
401
+ {
402
+ "completion_length": 725.0677175521851,
403
+ "epoch": 1.5474339035769828,
404
+ "grad_norm": 0.3454972207546234,
405
+ "kl": 1.0763904643245041,
406
+ "learning_rate": 4.851957747496606e-07,
407
+ "loss": 0.0011,
408
+ "reward": 0.10212798128486611,
409
+ "reward_std": 0.13816983328433707,
410
+ "rewards/equation_reward_func": 0.10212798012071289,
411
+ "rewards/format_reward_func": 0.0,
412
+ "step": 62
413
+ },
414
+ {
415
+ "completion_length": 730.5171251296997,
416
+ "epoch": 1.5972006220839814,
417
+ "grad_norm": 0.3473067581653595,
418
+ "kl": 1.4565551071427763,
419
+ "learning_rate": 4.839500322551386e-07,
420
+ "loss": 0.0015,
421
+ "reward": 0.10485119439545088,
422
+ "reward_std": 0.14129075466189533,
423
+ "rewards/equation_reward_func": 0.10485119334771298,
424
+ "rewards/format_reward_func": 0.0,
425
+ "step": 64
426
+ },
427
+ {
428
+ "completion_length": 735.0320043563843,
429
+ "epoch": 1.64696734059098,
430
+ "grad_norm": 0.3159619867801666,
431
+ "kl": 1.5041364189237356,
432
+ "learning_rate": 4.826557047444563e-07,
433
+ "loss": 0.0015,
434
+ "reward": 0.10093006424722262,
435
+ "reward_std": 0.13811934839759488,
436
+ "rewards/equation_reward_func": 0.1009300641308073,
437
+ "rewards/format_reward_func": 0.0,
438
+ "step": 66
439
+ },
440
+ {
441
+ "completion_length": 730.7455463409424,
442
+ "epoch": 1.6967340590979783,
443
+ "grad_norm": 1.146909236907959,
444
+ "kl": 2.238507369533181,
445
+ "learning_rate": 4.813130610139993e-07,
446
+ "loss": 0.0022,
447
+ "reward": 0.10973958898102865,
448
+ "reward_std": 0.13851106038782746,
449
+ "rewards/equation_reward_func": 0.10973958781687543,
450
+ "rewards/format_reward_func": 0.0,
451
+ "step": 68
452
+ },
453
+ {
454
+ "completion_length": 712.6971893310547,
455
+ "epoch": 1.7465007776049766,
456
+ "grad_norm": 7.27742338180542,
457
+ "kl": 3.2542791040614247,
458
+ "learning_rate": 4.799223798941089e-07,
459
+ "loss": 0.0033,
460
+ "reward": 0.12900298138265498,
461
+ "reward_std": 0.15667404458508827,
462
+ "rewards/equation_reward_func": 0.1290029831288848,
463
+ "rewards/format_reward_func": 0.0,
464
+ "step": 70
465
+ },
466
+ {
467
+ "completion_length": 729.6331987380981,
468
+ "epoch": 1.7962674961119751,
469
+ "grad_norm": 10.986953735351562,
470
+ "kl": 4.106183127500117,
471
+ "learning_rate": 4.78483950191177e-07,
472
+ "loss": 0.0041,
473
+ "reward": 0.12543899397132918,
474
+ "reward_std": 0.16567694948753342,
475
+ "rewards/equation_reward_func": 0.12543899344746023,
476
+ "rewards/format_reward_func": 0.0,
477
+ "step": 72
478
+ },
479
+ {
480
+ "completion_length": 737.0245656967163,
481
+ "epoch": 1.8460342146189737,
482
+ "grad_norm": 1.6122727394104004,
483
+ "kl": 3.731540434062481,
484
+ "learning_rate": 4.769980706276687e-07,
485
+ "loss": 0.0037,
486
+ "reward": 0.12507440976332873,
487
+ "reward_std": 0.159569505834952,
488
+ "rewards/equation_reward_func": 0.12507440929766744,
489
+ "rewards/format_reward_func": 0.0,
490
+ "step": 74
491
+ },
492
+ {
493
+ "completion_length": 729.0632581710815,
494
+ "epoch": 1.895800933125972,
495
+ "grad_norm": 0.5852969288825989,
496
+ "kl": 2.9793617641553283,
497
+ "learning_rate": 4.7546504978008595e-07,
498
+ "loss": 0.003,
499
+ "reward": 0.12817708833608776,
500
+ "reward_std": 0.1600989469443448,
501
+ "rewards/equation_reward_func": 0.1281770879868418,
502
+ "rewards/format_reward_func": 0.0,
503
+ "step": 76
504
+ },
505
+ {
506
+ "completion_length": 734.6302223205566,
507
+ "epoch": 1.9455676516329703,
508
+ "grad_norm": 0.9090600609779358,
509
+ "kl": 3.139740688726306,
510
+ "learning_rate": 4.738852060148848e-07,
511
+ "loss": 0.0031,
512
+ "reward": 0.13495536311529577,
513
+ "reward_std": 0.1720278718858026,
514
+ "rewards/equation_reward_func": 0.13495536299888045,
515
+ "rewards/format_reward_func": 0.0,
516
+ "step": 78
517
+ },
518
+ {
519
+ "completion_length": 742.833345413208,
520
+ "epoch": 1.995334370139969,
521
+ "grad_norm": 0.5681818723678589,
522
+ "kl": 3.712686972692609,
523
+ "learning_rate": 4.722588674223593e-07,
524
+ "loss": 0.0037,
525
+ "reward": 0.13085565919755027,
526
+ "reward_std": 0.15991040458902717,
527
+ "rewards/equation_reward_func": 0.1308556593139656,
528
+ "rewards/format_reward_func": 0.0,
529
+ "step": 80
530
+ },
531
+ {
532
+ "completion_length": 717.2042718184622,
533
+ "epoch": 2.0248833592534994,
534
+ "grad_norm": 1.5164953470230103,
535
+ "kl": 5.466580171334116,
536
+ "learning_rate": 4.70586371748506e-07,
537
+ "loss": 0.0032,
538
+ "reward": 0.14641604347056464,
539
+ "reward_std": 0.18159407436063416,
540
+ "rewards/equation_reward_func": 0.1464160444509042,
541
+ "rewards/format_reward_func": 0.0,
542
+ "step": 82
543
+ },
544
+ {
545
+ "completion_length": 730.2589464187622,
546
+ "epoch": 2.0746500777604977,
547
+ "grad_norm": 0.6375504732131958,
548
+ "kl": 4.280845553614199,
549
+ "learning_rate": 4.6886806632488363e-07,
550
+ "loss": 0.0043,
551
+ "reward": 0.14213542238576338,
552
+ "reward_std": 0.1740714008337818,
553
+ "rewards/equation_reward_func": 0.14213542168727145,
554
+ "rewards/format_reward_func": 0.0,
555
+ "step": 84
556
+ },
557
+ {
558
+ "completion_length": 744.4538831710815,
559
+ "epoch": 2.124416796267496,
560
+ "grad_norm": 0.9480769038200378,
561
+ "kl": 7.16812994517386,
562
+ "learning_rate": 4.6710430799648143e-07,
563
+ "loss": 0.0072,
564
+ "reward": 0.12831845637992956,
565
+ "reward_std": 0.1582361755426973,
566
+ "rewards/equation_reward_func": 0.12831845649634488,
567
+ "rewards/format_reward_func": 0.0,
568
+ "step": 86
569
+ },
570
+ {
571
+ "completion_length": 732.5520973205566,
572
+ "epoch": 2.1741835147744943,
573
+ "grad_norm": 16.496623992919922,
574
+ "kl": 10.49539315700531,
575
+ "learning_rate": 4.652954630476127e-07,
576
+ "loss": 0.0105,
577
+ "reward": 0.14677828032290563,
578
+ "reward_std": 0.1764058277476579,
579
+ "rewards/equation_reward_func": 0.1467782796244137,
580
+ "rewards/format_reward_func": 0.0,
581
+ "step": 88
582
+ },
583
+ {
584
+ "completion_length": 736.1361722946167,
585
+ "epoch": 2.223950233281493,
586
+ "grad_norm": 2.352017879486084,
587
+ "kl": 10.109702784568071,
588
+ "learning_rate": 4.6344190712584713e-07,
589
+ "loss": 0.0101,
590
+ "reward": 0.13781250565079972,
591
+ "reward_std": 0.1627702646655962,
592
+ "rewards/equation_reward_func": 0.13781250413740054,
593
+ "rewards/format_reward_func": 0.0,
594
+ "step": 90
595
+ },
596
+ {
597
+ "completion_length": 749.1317129135132,
598
+ "epoch": 2.2737169517884914,
599
+ "grad_norm": 3.804121255874634,
600
+ "kl": 15.052036292850971,
601
+ "learning_rate": 4.615440251639995e-07,
602
+ "loss": 0.0151,
603
+ "reward": 0.14105655340244994,
604
+ "reward_std": 0.17247924709226936,
605
+ "rewards/equation_reward_func": 0.14105655369348824,
606
+ "rewards/format_reward_func": 0.0,
607
+ "step": 92
608
+ },
609
+ {
610
+ "completion_length": 717.3884019851685,
611
+ "epoch": 2.3234836702954897,
612
+ "grad_norm": 2.226238489151001,
613
+ "kl": 12.018643591552973,
614
+ "learning_rate": 4.596022113001894e-07,
615
+ "loss": 0.012,
616
+ "reward": 0.15741816238733009,
617
+ "reward_std": 0.17923290858743712,
618
+ "rewards/equation_reward_func": 0.15741816128138453,
619
+ "rewards/format_reward_func": 0.0,
620
+ "step": 94
621
+ },
622
+ {
623
+ "completion_length": 726.2500143051147,
624
+ "epoch": 2.3732503888024885,
625
+ "grad_norm": 2.1459925174713135,
626
+ "kl": 12.27118530496955,
627
+ "learning_rate": 4.576168687959895e-07,
628
+ "loss": 0.0123,
629
+ "reward": 0.16154762578662485,
630
+ "reward_std": 0.18940409342758358,
631
+ "rewards/equation_reward_func": 0.16154762508813292,
632
+ "rewards/format_reward_func": 0.0,
633
+ "step": 96
634
+ },
635
+ {
636
+ "completion_length": 711.6696538925171,
637
+ "epoch": 2.423017107309487,
638
+ "grad_norm": 1.4883497953414917,
639
+ "kl": 15.596692271530628,
640
+ "learning_rate": 4.555884099526793e-07,
641
+ "loss": 0.0156,
642
+ "reward": 0.15925595845328644,
643
+ "reward_std": 0.1815938005456701,
644
+ "rewards/equation_reward_func": 0.1592559577547945,
645
+ "rewards/format_reward_func": 0.0,
646
+ "step": 98
647
+ },
648
+ {
649
+ "completion_length": 719.6242723464966,
650
+ "epoch": 2.472783825816485,
651
+ "grad_norm": 4.10906982421875,
652
+ "kl": 17.258602559566498,
653
+ "learning_rate": 4.5351725602562174e-07,
654
+ "loss": 0.0173,
655
+ "reward": 0.17212054354604334,
656
+ "reward_std": 0.18435519566992298,
657
+ "rewards/equation_reward_func": 0.17212054308038205,
658
+ "rewards/format_reward_func": 0.0,
659
+ "step": 100
660
+ },
661
+ {
662
+ "completion_length": 697.6637020111084,
663
+ "epoch": 2.522550544323484,
664
+ "grad_norm": 1.1079808473587036,
665
+ "kl": 14.344636462628841,
666
+ "learning_rate": 4.514038371367791e-07,
667
+ "loss": 0.0143,
668
+ "reward": 0.17430060362676159,
669
+ "reward_std": 0.19522728596348315,
670
+ "rewards/equation_reward_func": 0.17430060246260837,
671
+ "rewards/format_reward_func": 0.0,
672
+ "step": 102
673
+ },
674
+ {
675
+ "completion_length": 695.2105755805969,
676
+ "epoch": 2.5723172628304822,
677
+ "grad_norm": 1.298901081085205,
678
+ "kl": 15.563006613403559,
679
+ "learning_rate": 4.4924859218538936e-07,
680
+ "loss": 0.0156,
681
+ "reward": 0.17871280398685485,
682
+ "reward_std": 0.19645729020703584,
683
+ "rewards/equation_reward_func": 0.17871280352119356,
684
+ "rewards/format_reward_func": 0.0,
685
+ "step": 104
686
+ },
687
+ {
688
+ "completion_length": 687.2507581710815,
689
+ "epoch": 2.6220839813374806,
690
+ "grad_norm": 1.333657145500183,
691
+ "kl": 14.787582196295261,
692
+ "learning_rate": 4.470519687568185e-07,
693
+ "loss": 0.0148,
694
+ "reward": 0.19031250709667802,
695
+ "reward_std": 0.2006249635014683,
696
+ "rewards/equation_reward_func": 0.19031250721309334,
697
+ "rewards/format_reward_func": 0.0,
698
+ "step": 106
699
+ },
700
+ {
701
+ "completion_length": 672.3839402198792,
702
+ "epoch": 2.671850699844479,
703
+ "grad_norm": 1.4585353136062622,
704
+ "kl": 20.08526621758938,
705
+ "learning_rate": 4.4481442302960923e-07,
706
+ "loss": 0.0201,
707
+ "reward": 0.18158482806757092,
708
+ "reward_std": 0.1955818484420888,
709
+ "rewards/equation_reward_func": 0.18158482783474028,
710
+ "rewards/format_reward_func": 0.0,
711
+ "step": 108
712
+ },
713
+ {
714
+ "completion_length": 651.4077491760254,
715
+ "epoch": 2.721617418351477,
716
+ "grad_norm": 1.516221523284912,
717
+ "kl": 17.027776926755905,
718
+ "learning_rate": 4.4253641968074505e-07,
719
+ "loss": 0.017,
720
+ "reward": 0.1995759003330022,
721
+ "reward_std": 0.21349556557834148,
722
+ "rewards/equation_reward_func": 0.19957590056583285,
723
+ "rewards/format_reward_func": 0.0,
724
+ "step": 110
725
+ },
726
+ {
727
+ "completion_length": 672.9442043304443,
728
+ "epoch": 2.771384136858476,
729
+ "grad_norm": 2.0658159255981445,
730
+ "kl": 20.176754418760538,
731
+ "learning_rate": 4.402184317891501e-07,
732
+ "loss": 0.0202,
733
+ "reward": 0.20375744753982872,
734
+ "reward_std": 0.18776777852326632,
735
+ "rewards/equation_reward_func": 0.2037574463756755,
736
+ "rewards/format_reward_func": 0.0,
737
+ "step": 112
738
+ },
739
+ {
740
+ "completion_length": 665.7247114181519,
741
+ "epoch": 2.8211508553654743,
742
+ "grad_norm": 2.339445114135742,
743
+ "kl": 22.64492540061474,
744
+ "learning_rate": 4.37860940737443e-07,
745
+ "loss": 0.0226,
746
+ "reward": 0.1926413766341284,
747
+ "reward_std": 0.2001927924575284,
748
+ "rewards/equation_reward_func": 0.19264137593563646,
749
+ "rewards/format_reward_func": 0.0,
750
+ "step": 114
751
+ },
752
+ {
753
+ "completion_length": 669.665937423706,
754
+ "epoch": 2.8709175738724726,
755
+ "grad_norm": 2.852607011795044,
756
+ "kl": 32.22943264245987,
757
+ "learning_rate": 4.354644361119671e-07,
758
+ "loss": 0.0322,
759
+ "reward": 0.19950893591158092,
760
+ "reward_std": 0.1933421454159543,
761
+ "rewards/equation_reward_func": 0.19950893614441156,
762
+ "rewards/format_reward_func": 0.0,
763
+ "step": 116
764
+ },
765
+ {
766
+ "completion_length": 670.7053713798523,
767
+ "epoch": 2.9206842923794714,
768
+ "grad_norm": 2.6619129180908203,
769
+ "kl": 27.73328886926174,
770
+ "learning_rate": 4.3302941560111716e-07,
771
+ "loss": 0.0277,
772
+ "reward": 0.19388393545523286,
773
+ "reward_std": 0.19777346146292984,
774
+ "rewards/equation_reward_func": 0.1938839361537248,
775
+ "rewards/format_reward_func": 0.0,
776
+ "step": 118
777
+ },
778
+ {
779
+ "completion_length": 676.3571548461914,
780
+ "epoch": 2.9704510108864697,
781
+ "grad_norm": 3.816153049468994,
782
+ "kl": 27.2223904132843,
783
+ "learning_rate": 4.3055638489198236e-07,
784
+ "loss": 0.0272,
785
+ "reward": 0.20729167491663247,
786
+ "reward_std": 0.20934273721650243,
787
+ "rewards/equation_reward_func": 0.20729167328681797,
788
+ "rewards/format_reward_func": 0.0,
789
+ "step": 120
790
+ },
791
+ {
792
+ "completion_length": 659.7907361482319,
793
+ "epoch": 3.0,
794
+ "grad_norm": 0.624527633190155,
795
+ "kl": 27.528421577654388,
796
+ "learning_rate": 4.280458575653296e-07,
797
+ "loss": 0.0163,
798
+ "reward": 0.20659148869545838,
799
+ "reward_std": 0.19081004316869535,
800
+ "rewards/equation_reward_func": 0.20659148947973,
801
+ "rewards/format_reward_func": 0.0,
802
+ "step": 122
803
+ },
804
+ {
805
+ "completion_length": 659.4025421142578,
806
+ "epoch": 3.0497667185069983,
807
+ "grad_norm": 3.345853567123413,
808
+ "kl": 21.34368522465229,
809
+ "learning_rate": 4.2549835498894665e-07,
810
+ "loss": 0.0213,
811
+ "reward": 0.22118304355535656,
812
+ "reward_std": 0.21869899448938668,
813
+ "rewards/equation_reward_func": 0.22118304437026381,
814
+ "rewards/format_reward_func": 0.0,
815
+ "step": 124
816
+ },
817
+ {
818
+ "completion_length": 672.1183128356934,
819
+ "epoch": 3.099533437013997,
820
+ "grad_norm": 6.106723785400391,
821
+ "kl": 23.556977652013302,
822
+ "learning_rate": 4.229144062093679e-07,
823
+ "loss": 0.0236,
824
+ "reward": 0.21467262762598693,
825
+ "reward_std": 0.2053254572674632,
826
+ "rewards/equation_reward_func": 0.21467262762598693,
827
+ "rewards/format_reward_func": 0.0,
828
+ "step": 126
829
+ },
830
+ {
831
+ "completion_length": 653.0297751426697,
832
+ "epoch": 3.1493001555209954,
833
+ "grad_norm": 5.746135234832764,
834
+ "kl": 26.1618300229311,
835
+ "learning_rate": 4.2029454784200675e-07,
836
+ "loss": 0.0262,
837
+ "reward": 0.21742560202255845,
838
+ "reward_std": 0.2172505116323009,
839
+ "rewards/equation_reward_func": 0.217425603303127,
840
+ "rewards/format_reward_func": 0.0,
841
+ "step": 128
842
+ },
843
+ {
844
+ "completion_length": 645.058048248291,
845
+ "epoch": 3.1990668740279937,
846
+ "grad_norm": 60.6376953125,
847
+ "kl": 53.1397475451231,
848
+ "learning_rate": 4.1763932395971433e-07,
849
+ "loss": 0.0531,
850
+ "reward": 0.2241517937509343,
851
+ "reward_std": 0.20952896296512336,
852
+ "rewards/equation_reward_func": 0.22415179491508752,
853
+ "rewards/format_reward_func": 0.0,
854
+ "step": 130
855
+ },
856
+ {
857
+ "completion_length": 632.6659345626831,
858
+ "epoch": 3.248833592534992,
859
+ "grad_norm": 5.82427978515625,
860
+ "kl": 41.686398059129715,
861
+ "learning_rate": 4.1494928597979117e-07,
862
+ "loss": 0.0417,
863
+ "reward": 0.22440477029886097,
864
+ "reward_std": 0.2128691952675581,
865
+ "rewards/equation_reward_func": 0.22440477076452225,
866
+ "rewards/format_reward_func": 0.0,
867
+ "step": 132
868
+ },
869
+ {
870
+ "completion_length": 639.6711411476135,
871
+ "epoch": 3.298600311041991,
872
+ "grad_norm": 3.375183343887329,
873
+ "kl": 36.797510489821434,
874
+ "learning_rate": 4.122249925494726e-07,
875
+ "loss": 0.0368,
876
+ "reward": 0.2161235201638192,
877
+ "reward_std": 0.20362528192345053,
878
+ "rewards/equation_reward_func": 0.21612352062948048,
879
+ "rewards/format_reward_func": 0.0,
880
+ "step": 134
881
+ },
882
+ {
883
+ "completion_length": 651.2276935577393,
884
+ "epoch": 3.348367029548989,
885
+ "grad_norm": 5.04212760925293,
886
+ "kl": 37.60325849056244,
887
+ "learning_rate": 4.094670094299131e-07,
888
+ "loss": 0.0376,
889
+ "reward": 0.22996280749794096,
890
+ "reward_std": 0.214357816032134,
891
+ "rewards/equation_reward_func": 0.22996280703227967,
892
+ "rewards/format_reward_func": 0.0,
893
+ "step": 136
894
+ },
895
+ {
896
+ "completion_length": 631.5751585960388,
897
+ "epoch": 3.3981337480559874,
898
+ "grad_norm": 4.119243144989014,
899
+ "kl": 43.57139265537262,
900
+ "learning_rate": 4.066759093786931e-07,
901
+ "loss": 0.0436,
902
+ "reward": 0.2285714359022677,
903
+ "reward_std": 0.21766341011971235,
904
+ "rewards/equation_reward_func": 0.22857143532019109,
905
+ "rewards/format_reward_func": 0.0,
906
+ "step": 138
907
+ },
908
+ {
909
+ "completion_length": 647.8214359283447,
910
+ "epoch": 3.447900466562986,
911
+ "grad_norm": 7.117722988128662,
912
+ "kl": 60.4551947414875,
913
+ "learning_rate": 4.038522720308732e-07,
914
+ "loss": 0.0605,
915
+ "reward": 0.21806548640597612,
916
+ "reward_std": 0.20702184177935123,
917
+ "rewards/equation_reward_func": 0.2180654831463471,
918
+ "rewards/format_reward_func": 0.0,
919
+ "step": 140
920
+ },
921
+ {
922
+ "completion_length": 609.9583463668823,
923
+ "epoch": 3.4976671850699845,
924
+ "grad_norm": 4.748437881469727,
925
+ "kl": 58.59304141998291,
926
+ "learning_rate": 4.009966837786194e-07,
927
+ "loss": 0.0586,
928
+ "reward": 0.2300297737820074,
929
+ "reward_std": 0.20853826915845275,
930
+ "rewards/equation_reward_func": 0.23002976982388645,
931
+ "rewards/format_reward_func": 0.0,
932
+ "step": 142
933
+ },
934
+ {
935
+ "completion_length": 631.8430180549622,
936
+ "epoch": 3.547433903576983,
937
+ "grad_norm": 8.042330741882324,
938
+ "kl": 82.30807757377625,
939
+ "learning_rate": 3.981097376494259e-07,
940
+ "loss": 0.0823,
941
+ "reward": 0.21836310264188796,
942
+ "reward_std": 0.20933940180111676,
943
+ "rewards/equation_reward_func": 0.21836310101207346,
944
+ "rewards/format_reward_func": 0.0,
945
+ "step": 144
946
+ },
947
+ {
948
+ "completion_length": 624.0669736862183,
949
+ "epoch": 3.5972006220839816,
950
+ "grad_norm": 7.811219692230225,
951
+ "kl": 77.89375275373459,
952
+ "learning_rate": 3.951920331829592e-07,
953
+ "loss": 0.0779,
954
+ "reward": 0.2207961401436478,
955
+ "reward_std": 0.21105306909885257,
956
+ "rewards/equation_reward_func": 0.22079613932874054,
957
+ "rewards/format_reward_func": 0.0,
958
+ "step": 146
959
+ },
960
+ {
961
+ "completion_length": 623.5215888023376,
962
+ "epoch": 3.64696734059098,
963
+ "grad_norm": 8.836230278015137,
964
+ "kl": 65.97143815457821,
965
+ "learning_rate": 3.922441763065506e-07,
966
+ "loss": 0.066,
967
+ "reward": 0.2193824496353045,
968
+ "reward_std": 0.20604081987403333,
969
+ "rewards/equation_reward_func": 0.21938244777265936,
970
+ "rewards/format_reward_func": 0.0,
971
+ "step": 148
972
+ },
973
+ {
974
+ "completion_length": 634.7611751556396,
975
+ "epoch": 3.6967340590979783,
976
+ "grad_norm": 5.354574680328369,
977
+ "kl": 56.36278319358826,
978
+ "learning_rate": 3.8926677920936093e-07,
979
+ "loss": 0.0564,
980
+ "reward": 0.2112648879410699,
981
+ "reward_std": 0.2029515573522076,
982
+ "rewards/equation_reward_func": 0.21126488805748522,
983
+ "rewards/format_reward_func": 0.0,
984
+ "step": 150
985
+ },
986
+ {
987
+ "completion_length": 636.0297775268555,
988
+ "epoch": 3.7465007776049766,
989
+ "grad_norm": 5.276882648468018,
990
+ "kl": 65.72037261724472,
991
+ "learning_rate": 3.862604602152464e-07,
992
+ "loss": 0.0657,
993
+ "reward": 0.20753721124492586,
994
+ "reward_std": 0.20195745571982116,
995
+ "rewards/equation_reward_func": 0.20753721171058714,
996
+ "rewards/format_reward_func": 0.0,
997
+ "step": 152
998
+ },
999
+ {
1000
+ "completion_length": 634.954626083374,
1001
+ "epoch": 3.796267496111975,
1002
+ "grad_norm": 8.027347564697266,
1003
+ "kl": 77.93326985836029,
1004
+ "learning_rate": 3.8322584365434934e-07,
1005
+ "loss": 0.0779,
1006
+ "reward": 0.2165699511533603,
1007
+ "reward_std": 0.2101849897298962,
1008
+ "rewards/equation_reward_func": 0.2165699495235458,
1009
+ "rewards/format_reward_func": 0.0,
1010
+ "step": 154
1011
+ },
1012
+ {
1013
+ "completion_length": 638.3660817146301,
1014
+ "epoch": 3.8460342146189737,
1015
+ "grad_norm": 4.954690456390381,
1016
+ "kl": 83.4894488453865,
1017
+ "learning_rate": 3.8016355973344173e-07,
1018
+ "loss": 0.0835,
1019
+ "reward": 0.21200893796049058,
1020
+ "reward_std": 0.21022081119008362,
1021
+ "rewards/equation_reward_func": 0.21200893679633737,
1022
+ "rewards/format_reward_func": 0.0,
1023
+ "step": 156
1024
+ },
1025
+ {
1026
+ "completion_length": 620.3281378746033,
1027
+ "epoch": 3.895800933125972,
1028
+ "grad_norm": 4.270212650299072,
1029
+ "kl": 82.2349089384079,
1030
+ "learning_rate": 3.7707424440504863e-07,
1031
+ "loss": 0.0822,
1032
+ "reward": 0.211755960714072,
1033
+ "reward_std": 0.20715959300287068,
1034
+ "rewards/equation_reward_func": 0.21175595885142684,
1035
+ "rewards/format_reward_func": 0.0,
1036
+ "step": 158
1037
+ },
1038
+ {
1039
+ "completion_length": 632.0409350395203,
1040
+ "epoch": 3.9455676516329703,
1041
+ "grad_norm": 4.687271595001221,
1042
+ "kl": 90.35439342260361,
1043
+ "learning_rate": 3.739585392353787e-07,
1044
+ "loss": 0.0904,
1045
+ "reward": 0.21921131818089634,
1046
+ "reward_std": 0.20252067118417472,
1047
+ "rewards/equation_reward_func": 0.21921131608542055,
1048
+ "rewards/format_reward_func": 0.0,
1049
+ "step": 160
1050
+ },
1051
+ {
1052
+ "completion_length": 630.2678661346436,
1053
+ "epoch": 3.995334370139969,
1054
+ "grad_norm": 5.595997333526611,
1055
+ "kl": 95.46352458000183,
1056
+ "learning_rate": 3.7081709127108767e-07,
1057
+ "loss": 0.0955,
1058
+ "reward": 0.22013393603265285,
1059
+ "reward_std": 0.2177246706560254,
1060
+ "rewards/equation_reward_func": 0.2201339368475601,
1061
+ "rewards/format_reward_func": 0.0,
1062
+ "step": 162
1063
+ },
1064
+ {
1065
+ "completion_length": 632.1065288342928,
1066
+ "epoch": 4.024883359253499,
1067
+ "grad_norm": 8.787236213684082,
1068
+ "kl": 144.07192611694336,
1069
+ "learning_rate": 3.6765055290490513e-07,
1070
+ "loss": 0.0855,
1071
+ "reward": 0.20649123721216855,
1072
+ "reward_std": 0.21240881752026708,
1073
+ "rewards/equation_reward_func": 0.2064912359377271,
1074
+ "rewards/format_reward_func": 0.0,
1075
+ "step": 164
1076
+ },
1077
+ {
1078
+ "completion_length": 619.5156345367432,
1079
+ "epoch": 4.074650077760498,
1080
+ "grad_norm": 7.552036762237549,
1081
+ "kl": 137.199125289917,
1082
+ "learning_rate": 3.644595817401501e-07,
1083
+ "loss": 0.1372,
1084
+ "reward": 0.2162797685014084,
1085
+ "reward_std": 0.21547920361626893,
1086
+ "rewards/equation_reward_func": 0.2162797685014084,
1087
+ "rewards/format_reward_func": 0.0,
1088
+ "step": 166
1089
+ },
1090
+ {
1091
+ "completion_length": 618.7634057998657,
1092
+ "epoch": 4.1244167962674965,
1093
+ "grad_norm": 6.8007354736328125,
1094
+ "kl": 103.6235063970089,
1095
+ "learning_rate": 3.6124484045416483e-07,
1096
+ "loss": 0.1036,
1097
+ "reward": 0.23168899782467633,
1098
+ "reward_std": 0.21457487577572465,
1099
+ "rewards/equation_reward_func": 0.23168899829033762,
1100
+ "rewards/format_reward_func": 0.0,
1101
+ "step": 168
1102
+ },
1103
+ {
1104
+ "completion_length": 637.4136991500854,
1105
+ "epoch": 4.174183514774494,
1106
+ "grad_norm": 8.004964828491211,
1107
+ "kl": 113.37393373250961,
1108
+ "learning_rate": 3.580069966606949e-07,
1109
+ "loss": 0.1134,
1110
+ "reward": 0.21156250836793333,
1111
+ "reward_std": 0.2123116059228778,
1112
+ "rewards/equation_reward_func": 0.21156250790227205,
1113
+ "rewards/format_reward_func": 0.0,
1114
+ "step": 170
1115
+ },
1116
+ {
1117
+ "completion_length": 634.7485208511353,
1118
+ "epoch": 4.223950233281493,
1119
+ "grad_norm": 7.898318290710449,
1120
+ "kl": 109.72896337509155,
1121
+ "learning_rate": 3.547467227712444e-07,
1122
+ "loss": 0.1097,
1123
+ "reward": 0.2029910811688751,
1124
+ "reward_std": 0.20662414643447846,
1125
+ "rewards/equation_reward_func": 0.20299108081962913,
1126
+ "rewards/format_reward_func": 0.0,
1127
+ "step": 172
1128
+ },
1129
+ {
1130
+ "completion_length": 621.2730751037598,
1131
+ "epoch": 4.273716951788492,
1132
+ "grad_norm": 7.211435317993164,
1133
+ "kl": 99.61057341098785,
1134
+ "learning_rate": 3.5146469585543386e-07,
1135
+ "loss": 0.0996,
1136
+ "reward": 0.22819941327907145,
1137
+ "reward_std": 0.2186455992050469,
1138
+ "rewards/equation_reward_func": 0.22819941234774888,
1139
+ "rewards/format_reward_func": 0.0,
1140
+ "step": 174
1141
+ },
1142
+ {
1143
+ "completion_length": 640.9628086090088,
1144
+ "epoch": 4.32348367029549,
1145
+ "grad_norm": 7.790672302246094,
1146
+ "kl": 93.87813127040863,
1147
+ "learning_rate": 3.481615975003922e-07,
1148
+ "loss": 0.0939,
1149
+ "reward": 0.2149925670819357,
1150
+ "reward_std": 0.20749260939192027,
1151
+ "rewards/equation_reward_func": 0.2149925702251494,
1152
+ "rewards/format_reward_func": 0.0,
1153
+ "step": 176
1154
+ },
1155
+ {
1156
+ "completion_length": 615.1093888282776,
1157
+ "epoch": 4.3732503888024885,
1158
+ "grad_norm": 22.329519271850586,
1159
+ "kl": 87.78260296583176,
1160
+ "learning_rate": 3.448381136692089e-07,
1161
+ "loss": 0.0878,
1162
+ "reward": 0.21617560542654246,
1163
+ "reward_std": 0.20247984025627375,
1164
+ "rewards/equation_reward_func": 0.2161756035638973,
1165
+ "rewards/format_reward_func": 0.0,
1166
+ "step": 178
1167
+ },
1168
+ {
1169
+ "completion_length": 629.4829001426697,
1170
+ "epoch": 4.423017107309486,
1171
+ "grad_norm": 13.893996238708496,
1172
+ "kl": 98.21013808250427,
1173
+ "learning_rate": 3.4149493455847897e-07,
1174
+ "loss": 0.0982,
1175
+ "reward": 0.21152530901599675,
1176
+ "reward_std": 0.2093647257424891,
1177
+ "rewards/equation_reward_func": 0.21152530668769032,
1178
+ "rewards/format_reward_func": 0.0,
1179
+ "step": 180
1180
+ },
1181
+ {
1182
+ "completion_length": 623.7224802970886,
1183
+ "epoch": 4.472783825816485,
1184
+ "grad_norm": 7.4938130378723145,
1185
+ "kl": 149.59339570999146,
1186
+ "learning_rate": 3.3813275445496766e-07,
1187
+ "loss": 0.1496,
1188
+ "reward": 0.2145535812014714,
1189
+ "reward_std": 0.2063142586266622,
1190
+ "rewards/equation_reward_func": 0.214553578523919,
1191
+ "rewards/format_reward_func": 0.0,
1192
+ "step": 182
1193
+ },
1194
+ {
1195
+ "completion_length": 639.263400554657,
1196
+ "epoch": 4.522550544323484,
1197
+ "grad_norm": 6.325891494750977,
1198
+ "kl": 147.64970636367798,
1199
+ "learning_rate": 3.347522715914262e-07,
1200
+ "loss": 0.1476,
1201
+ "reward": 0.20923363824840635,
1202
+ "reward_std": 0.20685563085135072,
1203
+ "rewards/equation_reward_func": 0.20923363824840635,
1204
+ "rewards/format_reward_func": 0.0,
1205
+ "step": 184
1206
+ },
1207
+ {
1208
+ "completion_length": 636.6897439956665,
1209
+ "epoch": 4.572317262830482,
1210
+ "grad_norm": 4.635812759399414,
1211
+ "kl": 130.48132091760635,
1212
+ "learning_rate": 3.313541880015877e-07,
1213
+ "loss": 0.1305,
1214
+ "reward": 0.21598215226549655,
1215
+ "reward_std": 0.2006415540818125,
1216
+ "rewards/equation_reward_func": 0.21598214923869818,
1217
+ "rewards/format_reward_func": 0.0,
1218
+ "step": 186
1219
+ },
1220
+ {
1221
+ "completion_length": 631.9933152198792,
1222
+ "epoch": 4.6220839813374806,
1223
+ "grad_norm": 7.933198928833008,
1224
+ "kl": 118.75544810295105,
1225
+ "learning_rate": 3.279392093743747e-07,
1226
+ "loss": 0.1188,
1227
+ "reward": 0.22688244911842048,
1228
+ "reward_std": 0.22052743670064956,
1229
+ "rewards/equation_reward_func": 0.22688244772143662,
1230
+ "rewards/format_reward_func": 0.0,
1231
+ "step": 188
1232
+ },
1233
+ {
1234
+ "completion_length": 632.7038769721985,
1235
+ "epoch": 4.671850699844479,
1236
+ "grad_norm": 6.763364791870117,
1237
+ "kl": 112.75827008485794,
1238
+ "learning_rate": 3.245080449073459e-07,
1239
+ "loss": 0.1128,
1240
+ "reward": 0.2060937569476664,
1241
+ "reward_std": 0.20044768252409995,
1242
+ "rewards/equation_reward_func": 0.2060937574133277,
1243
+ "rewards/format_reward_func": 0.0,
1244
+ "step": 190
1245
+ },
1246
+ {
1247
+ "completion_length": 632.4464421272278,
1248
+ "epoch": 4.721617418351477,
1249
+ "grad_norm": 4.295353412628174,
1250
+ "kl": 108.82453501224518,
1251
+ "learning_rate": 3.210614071594162e-07,
1252
+ "loss": 0.1088,
1253
+ "reward": 0.20745536405593157,
1254
+ "reward_std": 0.21275918127503246,
1255
+ "rewards/equation_reward_func": 0.2074553637066856,
1256
+ "rewards/format_reward_func": 0.0,
1257
+ "step": 192
1258
+ },
1259
+ {
1260
+ "completion_length": 634.1763515472412,
1261
+ "epoch": 4.771384136858476,
1262
+ "grad_norm": 4.46217679977417,
1263
+ "kl": 118.317107796669,
1264
+ "learning_rate": 3.1760001190287695e-07,
1265
+ "loss": 0.1183,
1266
+ "reward": 0.20520090113859624,
1267
+ "reward_std": 0.2021206704666838,
1268
+ "rewards/equation_reward_func": 0.20520090113859624,
1269
+ "rewards/format_reward_func": 0.0,
1270
+ "step": 194
1271
+ },
1272
+ {
1273
+ "completion_length": 620.2395968437195,
1274
+ "epoch": 4.821150855365475,
1275
+ "grad_norm": 4.841196060180664,
1276
+ "kl": 119.24478554725647,
1277
+ "learning_rate": 3.141245779747502e-07,
1278
+ "loss": 0.1192,
1279
+ "reward": 0.21259673358872533,
1280
+ "reward_std": 0.21422103908844292,
1281
+ "rewards/equation_reward_func": 0.21259673358872533,
1282
+ "rewards/format_reward_func": 0.0,
1283
+ "step": 196
1284
+ },
1285
+ {
1286
+ "completion_length": 609.0446557998657,
1287
+ "epoch": 4.870917573872473,
1288
+ "grad_norm": 4.3330559730529785,
1289
+ "kl": 119.67610502243042,
1290
+ "learning_rate": 3.106358271275056e-07,
1291
+ "loss": 0.1197,
1292
+ "reward": 0.22683036630041897,
1293
+ "reward_std": 0.20717181416694075,
1294
+ "rewards/equation_reward_func": 0.22683036653324962,
1295
+ "rewards/format_reward_func": 0.0,
1296
+ "step": 198
1297
+ },
1298
+ {
1299
+ "completion_length": 614.8869152069092,
1300
+ "epoch": 4.920684292379471,
1301
+ "grad_norm": 92.09661102294922,
1302
+ "kl": 144.53644692897797,
1303
+ "learning_rate": 3.0713448387917227e-07,
1304
+ "loss": 0.1445,
1305
+ "reward": 0.21901042643003166,
1306
+ "reward_std": 0.20682094641961157,
1307
+ "rewards/equation_reward_func": 0.2190104245673865,
1308
+ "rewards/format_reward_func": 0.0,
1309
+ "step": 200
1310
+ },
1311
+ {
1312
+ "completion_length": 631.4241156578064,
1313
+ "epoch": 4.970451010886469,
1314
+ "grad_norm": 6.355322360992432,
1315
+ "kl": 154.4233751296997,
1316
+ "learning_rate": 3.0362127536287636e-07,
1317
+ "loss": 0.1544,
1318
+ "reward": 0.21773066406603903,
1319
+ "reward_std": 0.21250074298586696,
1320
+ "rewards/equation_reward_func": 0.2177306618541479,
1321
+ "rewards/format_reward_func": 0.0,
1322
+ "step": 202
1323
+ },
1324
+ {
1325
+ "completion_length": 624.7180488987973,
1326
+ "epoch": 5.0,
1327
+ "grad_norm": 5.770173072814941,
1328
+ "kl": 161.87928571199117,
1329
+ "learning_rate": 3.0009693117583523e-07,
1330
+ "loss": 0.0961,
1331
+ "reward": 0.21541354177813782,
1332
+ "reward_std": 0.20374000229333578,
1333
+ "rewards/equation_reward_func": 0.215413541386002,
1334
+ "rewards/format_reward_func": 0.0,
1335
+ "step": 204
1336
+ },
1337
+ {
1338
+ "completion_length": 624.5647420883179,
1339
+ "epoch": 5.049766718506999,
1340
+ "grad_norm": 6.884070873260498,
1341
+ "kl": 157.92570447921753,
1342
+ "learning_rate": 2.965621832278401e-07,
1343
+ "loss": 0.1579,
1344
+ "reward": 0.22669643780682236,
1345
+ "reward_std": 0.20801680884324014,
1346
+ "rewards/equation_reward_func": 0.22669643454719335,
1347
+ "rewards/format_reward_func": 0.0,
1348
+ "step": 206
1349
+ },
1350
+ {
1351
+ "completion_length": 614.1570081710815,
1352
+ "epoch": 5.099533437013997,
1353
+ "grad_norm": 4.670907497406006,
1354
+ "kl": 134.14546036720276,
1355
+ "learning_rate": 2.9301776558925875e-07,
1356
+ "loss": 0.1341,
1357
+ "reward": 0.2188244123244658,
1358
+ "reward_std": 0.20453347032889724,
1359
+ "rewards/equation_reward_func": 0.21882441325578839,
1360
+ "rewards/format_reward_func": 0.0,
1361
+ "step": 208
1362
+ },
1363
+ {
1364
+ "completion_length": 614.4702506065369,
1365
+ "epoch": 5.149300155520995,
1366
+ "grad_norm": 14.716873168945312,
1367
+ "kl": 109.80421262979507,
1368
+ "learning_rate": 2.894644143385885e-07,
1369
+ "loss": 0.1098,
1370
+ "reward": 0.21839286445174366,
1371
+ "reward_std": 0.20062782417517155,
1372
+ "rewards/equation_reward_func": 0.21839286398608238,
1373
+ "rewards/format_reward_func": 0.0,
1374
+ "step": 210
1375
+ },
1376
+ {
1377
+ "completion_length": 622.4672718048096,
1378
+ "epoch": 5.199066874027994,
1379
+ "grad_norm": 10.858051300048828,
1380
+ "kl": 114.28983092308044,
1381
+ "learning_rate": 2.859028674095937e-07,
1382
+ "loss": 0.1143,
1383
+ "reward": 0.2192782819038257,
1384
+ "reward_std": 0.2128367607947439,
1385
+ "rewards/equation_reward_func": 0.21927828167099506,
1386
+ "rewards/format_reward_func": 0.0,
1387
+ "step": 212
1388
+ },
1389
+ {
1390
+ "completion_length": 612.6160840988159,
1391
+ "epoch": 5.248833592534992,
1392
+ "grad_norm": 3.8785901069641113,
1393
+ "kl": 125.06462055444717,
1394
+ "learning_rate": 2.823338644380566e-07,
1395
+ "loss": 0.1251,
1396
+ "reward": 0.23020090232603252,
1397
+ "reward_std": 0.2176531965378672,
1398
+ "rewards/equation_reward_func": 0.23020089999772608,
1399
+ "rewards/format_reward_func": 0.0,
1400
+ "step": 214
1401
+ },
1402
+ {
1403
+ "completion_length": 635.8995633125305,
1404
+ "epoch": 5.298600311041991,
1405
+ "grad_norm": 5.062567234039307,
1406
+ "kl": 148.21274209022522,
1407
+ "learning_rate": 2.7875814660817504e-07,
1408
+ "loss": 0.1482,
1409
+ "reward": 0.2193973324028775,
1410
+ "reward_std": 0.22195886494591832,
1411
+ "rewards/equation_reward_func": 0.21939733054023236,
1412
+ "rewards/format_reward_func": 0.0,
1413
+ "step": 216
1414
+ },
1415
+ {
1416
+ "completion_length": 630.8229269981384,
1417
+ "epoch": 5.348367029548989,
1418
+ "grad_norm": 5.181402206420898,
1419
+ "kl": 165.8618984222412,
1420
+ "learning_rate": 2.751764564986396e-07,
1421
+ "loss": 0.1659,
1422
+ "reward": 0.2077009006170556,
1423
+ "reward_std": 0.2193935844115913,
1424
+ "rewards/equation_reward_func": 0.2077009001513943,
1425
+ "rewards/format_reward_func": 0.0,
1426
+ "step": 218
1427
+ },
1428
+ {
1429
+ "completion_length": 628.6517939567566,
1430
+ "epoch": 5.3981337480559874,
1431
+ "grad_norm": 4.105767726898193,
1432
+ "kl": 148.7712802886963,
1433
+ "learning_rate": 2.715895379284194e-07,
1434
+ "loss": 0.1488,
1435
+ "reward": 0.2191815583501011,
1436
+ "reward_std": 0.20989621221087873,
1437
+ "rewards/equation_reward_func": 0.21918155602179468,
1438
+ "rewards/format_reward_func": 0.0,
1439
+ "step": 220
1440
+ },
1441
+ {
1442
+ "completion_length": 629.8006067276001,
1443
+ "epoch": 5.447900466562986,
1444
+ "grad_norm": 3.895611524581909,
1445
+ "kl": 142.22095596790314,
1446
+ "learning_rate": 2.6799813580229174e-07,
1447
+ "loss": 0.1422,
1448
+ "reward": 0.22290923492982984,
1449
+ "reward_std": 0.21323461562860757,
1450
+ "rewards/equation_reward_func": 0.2229092346969992,
1451
+ "rewards/format_reward_func": 0.0,
1452
+ "step": 222
1453
+ },
1454
+ {
1455
+ "completion_length": 608.6183171272278,
1456
+ "epoch": 5.497667185069984,
1457
+ "grad_norm": 6.331876277923584,
1458
+ "kl": 135.1478552222252,
1459
+ "learning_rate": 2.6440299595614606e-07,
1460
+ "loss": 0.1351,
1461
+ "reward": 0.21991072362288833,
1462
+ "reward_std": 0.22133340197615325,
1463
+ "rewards/equation_reward_func": 0.21991072269156575,
1464
+ "rewards/format_reward_func": 0.0,
1465
+ "step": 224
1466
+ },
1467
+ {
1468
+ "completion_length": 611.6756086349487,
1469
+ "epoch": 5.547433903576983,
1470
+ "grad_norm": 3.41554594039917,
1471
+ "kl": 135.47022581100464,
1472
+ "learning_rate": 2.6080486500209347e-07,
1473
+ "loss": 0.1355,
1474
+ "reward": 0.21784971025772393,
1475
+ "reward_std": 0.21086209290660918,
1476
+ "rewards/equation_reward_func": 0.2178497090935707,
1477
+ "rewards/format_reward_func": 0.0,
1478
+ "step": 226
1479
+ },
1480
+ {
1481
+ "completion_length": 609.0922722816467,
1482
+ "epoch": 5.597200622083982,
1483
+ "grad_norm": 4.638352870941162,
1484
+ "kl": 149.68241280317307,
1485
+ "learning_rate": 2.572044901734166e-07,
1486
+ "loss": 0.1497,
1487
+ "reward": 0.22438989242073148,
1488
+ "reward_std": 0.2241612394573167,
1489
+ "rewards/equation_reward_func": 0.2243898919550702,
1490
+ "rewards/format_reward_func": 0.0,
1491
+ "step": 228
1492
+ },
1493
+ {
1494
+ "completion_length": 629.8534321784973,
1495
+ "epoch": 5.6469673405909795,
1496
+ "grad_norm": 4.474099159240723,
1497
+ "kl": 164.97060561180115,
1498
+ "learning_rate": 2.536026191693893e-07,
1499
+ "loss": 0.165,
1500
+ "reward": 0.2060565553838387,
1501
+ "reward_std": 0.21067888580728322,
1502
+ "rewards/equation_reward_func": 0.20605655445251614,
1503
+ "rewards/format_reward_func": 0.0,
1504
+ "step": 230
1505
+ },
1506
+ {
1507
+ "completion_length": 626.8482217788696,
1508
+ "epoch": 5.696734059097978,
1509
+ "grad_norm": 9.778329849243164,
1510
+ "kl": 169.21773087978363,
1511
+ "learning_rate": 2.5e-07,
1512
+ "loss": 0.1692,
1513
+ "reward": 0.20911459170747548,
1514
+ "reward_std": 0.21599237713962793,
1515
+ "rewards/equation_reward_func": 0.2091145912418142,
1516
+ "rewards/format_reward_func": 0.0,
1517
+ "step": 232
1518
+ },
1519
+ {
1520
+ "completion_length": 629.8660821914673,
1521
+ "epoch": 5.746500777604977,
1522
+ "grad_norm": 5.210114479064941,
1523
+ "kl": 171.0250325202942,
1524
+ "learning_rate": 2.4639738083061073e-07,
1525
+ "loss": 0.171,
1526
+ "reward": 0.2135788791347295,
1527
+ "reward_std": 0.20587447995785624,
1528
+ "rewards/equation_reward_func": 0.21357887890189886,
1529
+ "rewards/format_reward_func": 0.0,
1530
+ "step": 234
1531
+ },
1532
+ {
1533
+ "completion_length": 628.7165260314941,
1534
+ "epoch": 5.796267496111975,
1535
+ "grad_norm": 4.644392490386963,
1536
+ "kl": 149.7915449142456,
1537
+ "learning_rate": 2.4279550982658345e-07,
1538
+ "loss": 0.1498,
1539
+ "reward": 0.20833334070630372,
1540
+ "reward_std": 0.21195052459370345,
1541
+ "rewards/equation_reward_func": 0.20833334047347307,
1542
+ "rewards/format_reward_func": 0.0,
1543
+ "step": 236
1544
+ },
1545
+ {
1546
+ "completion_length": 628.755964756012,
1547
+ "epoch": 5.846034214618974,
1548
+ "grad_norm": 6.456798076629639,
1549
+ "kl": 442.08424025774,
1550
+ "learning_rate": 2.3919513499790646e-07,
1551
+ "loss": 0.4421,
1552
+ "reward": 0.22005209047347307,
1553
+ "reward_std": 0.21488765871617943,
1554
+ "rewards/equation_reward_func": 0.22005209024064243,
1555
+ "rewards/format_reward_func": 0.0,
1556
+ "step": 238
1557
+ },
1558
+ {
1559
+ "completion_length": 612.3988199234009,
1560
+ "epoch": 5.895800933125972,
1561
+ "grad_norm": 9.304161071777344,
1562
+ "kl": 118.21684062480927,
1563
+ "learning_rate": 2.3559700404385394e-07,
1564
+ "loss": 0.1182,
1565
+ "reward": 0.22447917505633086,
1566
+ "reward_std": 0.211615604814142,
1567
+ "rewards/equation_reward_func": 0.22447917482350022,
1568
+ "rewards/format_reward_func": 0.0,
1569
+ "step": 240
1570
+ },
1571
+ {
1572
+ "completion_length": 633.3660821914673,
1573
+ "epoch": 5.94556765163297,
1574
+ "grad_norm": 5.745642185211182,
1575
+ "kl": 133.20424818992615,
1576
+ "learning_rate": 2.3200186419770823e-07,
1577
+ "loss": 0.1332,
1578
+ "reward": 0.2242708442499861,
1579
+ "reward_std": 0.2152464333921671,
1580
+ "rewards/equation_reward_func": 0.22427084331866354,
1581
+ "rewards/format_reward_func": 0.0,
1582
+ "step": 242
1583
+ },
1584
+ {
1585
+ "completion_length": 618.1235270500183,
1586
+ "epoch": 5.995334370139969,
1587
+ "grad_norm": 4.167017936706543,
1588
+ "kl": 143.97905486822128,
1589
+ "learning_rate": 2.284104620715807e-07,
1590
+ "loss": 0.144,
1591
+ "reward": 0.22046875627711415,
1592
+ "reward_std": 0.21442426112480462,
1593
+ "rewards/equation_reward_func": 0.22046875732485205,
1594
+ "rewards/format_reward_func": 0.0,
1595
+ "step": 244
1596
+ },
1597
+ {
1598
+ "completion_length": 634.5175580476459,
1599
+ "epoch": 6.024883359253499,
1600
+ "grad_norm": 3.44785213470459,
1601
+ "kl": 167.55113441065737,
1602
+ "learning_rate": 2.2482354350136043e-07,
1603
+ "loss": 0.0995,
1604
+ "reward": 0.21961153769179395,
1605
+ "reward_std": 0.2146961924276854,
1606
+ "rewards/equation_reward_func": 0.21961153769179395,
1607
+ "rewards/format_reward_func": 0.0,
1608
+ "step": 246
1609
+ },
1610
+ {
1611
+ "completion_length": 634.5863180160522,
1612
+ "epoch": 6.074650077760498,
1613
+ "grad_norm": 7.954348564147949,
1614
+ "kl": 163.61565399169922,
1615
+ "learning_rate": 2.2124185339182496e-07,
1616
+ "loss": 0.1636,
1617
+ "reward": 0.23546131700277328,
1618
+ "reward_std": 0.2178129724925384,
1619
+ "rewards/equation_reward_func": 0.23546131781768054,
1620
+ "rewards/format_reward_func": 0.0,
1621
+ "step": 248
1622
+ },
1623
+ {
1624
+ "completion_length": 610.0825996398926,
1625
+ "epoch": 6.1244167962674965,
1626
+ "grad_norm": 4.648006439208984,
1627
+ "kl": 167.8152883052826,
1628
+ "learning_rate": 2.1766613556194344e-07,
1629
+ "loss": 0.1678,
1630
+ "reward": 0.22144346224376932,
1631
+ "reward_std": 0.21030379901640117,
1632
+ "rewards/equation_reward_func": 0.22144346177810803,
1633
+ "rewards/format_reward_func": 0.0,
1634
+ "step": 250
1635
+ },
1636
+ {
1637
+ "epoch": 6.1244167962674965,
1638
+ "step": 250,
1639
+ "total_flos": 0.0,
1640
+ "train_loss": 0.0,
1641
+ "train_runtime": 0.0058,
1642
+ "train_samples_per_second": 3851297.791,
1643
+ "train_steps_per_second": 17193.294
1644
+ }
1645
+ ],
1646
+ "logging_steps": 2,
1647
+ "max_steps": 100,
1648
+ "num_input_tokens_seen": 0,
1649
+ "num_train_epochs": 3,
1650
+ "save_steps": 25,
1651
+ "stateful_callbacks": {
1652
+ "TrainerControl": {
1653
+ "args": {
1654
+ "should_epoch_stop": false,
1655
+ "should_evaluate": false,
1656
+ "should_log": false,
1657
+ "should_save": true,
1658
+ "should_training_stop": false
1659
+ },
1660
+ "attributes": {}
1661
+ }
1662
+ },
1663
+ "total_flos": 0.0,
1664
+ "train_batch_size": 1,
1665
+ "trial_name": null,
1666
+ "trial_params": null
1667
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90a6effc585bdefe330f2be7562ec2dff7b9a51e810846afc3eaea737d2854ee
3
+ size 5624
vocab.json ADDED
The diff for this file is too large to render. See raw diff