chflame163 commited on
Commit
d4d901b
1 Parent(s): f47a236

Upload 74 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/.gitattributes +37 -0
  2. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/README.md +305 -0
  3. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/all_results.json +22 -0
  4. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/config.json +35 -0
  5. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/eval_results.json +16 -0
  6. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/generation_config.json +7 -0
  7. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/instructions_function_calling.md +183 -0
  8. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/merges.txt +0 -0
  9. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/model.safetensors +3 -0
  10. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/model.safetensors.baiduyun.uploading.cfg +0 -0
  11. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/runs/Oct31_06-24-59_ip-26-0-174-36/events.out.tfevents.1730356365.ip-26-0-174-36.3169719.0 +3 -0
  12. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/runs/Oct31_06-24-59_ip-26-0-174-36/events.out.tfevents.1730363825.ip-26-0-174-36.3169719.1 +3 -0
  13. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/special_tokens_map.json +34 -0
  14. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/tokenizer.json +0 -0
  15. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/tokenizer_config.json +154 -0
  16. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/train_results.json +9 -0
  17. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/trainer_state.json +2426 -0
  18. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/training_args.bin +3 -0
  19. ComfyUI/models/smol/SmolLM2-1.7B-Instruct/vocab.json +0 -0
  20. ComfyUI/models/smol/SmolLM2-135M-Instruct/.gitattributes +35 -0
  21. ComfyUI/models/smol/SmolLM2-135M-Instruct/README.md +133 -0
  22. ComfyUI/models/smol/SmolLM2-135M-Instruct/all_results.json +22 -0
  23. ComfyUI/models/smol/SmolLM2-135M-Instruct/config.json +37 -0
  24. ComfyUI/models/smol/SmolLM2-135M-Instruct/eval_results.json +16 -0
  25. ComfyUI/models/smol/SmolLM2-135M-Instruct/generation_config.json +7 -0
  26. ComfyUI/models/smol/SmolLM2-135M-Instruct/merges.txt +0 -0
  27. ComfyUI/models/smol/SmolLM2-135M-Instruct/model.safetensors +3 -0
  28. ComfyUI/models/smol/SmolLM2-135M-Instruct/runs/Oct31_10-14-22_ip-26-0-174-36/events.out.tfevents.1730370128.ip-26-0-174-36.3239327.0 +3 -0
  29. ComfyUI/models/smol/SmolLM2-135M-Instruct/runs/Oct31_10-14-22_ip-26-0-174-36/events.out.tfevents.1730376056.ip-26-0-174-36.3239327.1 +3 -0
  30. ComfyUI/models/smol/SmolLM2-135M-Instruct/special_tokens_map.json +34 -0
  31. ComfyUI/models/smol/SmolLM2-135M-Instruct/tokenizer.json +0 -0
  32. ComfyUI/models/smol/SmolLM2-135M-Instruct/tokenizer_config.json +154 -0
  33. ComfyUI/models/smol/SmolLM2-135M-Instruct/train_results.json +9 -0
  34. ComfyUI/models/smol/SmolLM2-135M-Instruct/trainer_state.json +1626 -0
  35. ComfyUI/models/smol/SmolLM2-135M-Instruct/training_args.bin +3 -0
  36. ComfyUI/models/smol/SmolLM2-135M-Instruct/vocab.json +0 -0
  37. ComfyUI/models/smol/SmolLM2-360M-Instruct/.gitattributes +35 -0
  38. ComfyUI/models/smol/SmolLM2-360M-Instruct/README.md +133 -0
  39. ComfyUI/models/smol/SmolLM2-360M-Instruct/all_results.json +22 -0
  40. ComfyUI/models/smol/SmolLM2-360M-Instruct/config.json +37 -0
  41. ComfyUI/models/smol/SmolLM2-360M-Instruct/eval_results.json +16 -0
  42. ComfyUI/models/smol/SmolLM2-360M-Instruct/generation_config.json +7 -0
  43. ComfyUI/models/smol/SmolLM2-360M-Instruct/merges.txt +0 -0
  44. ComfyUI/models/smol/SmolLM2-360M-Instruct/model.safetensors +3 -0
  45. ComfyUI/models/smol/SmolLM2-360M-Instruct/model.safetensors.baiduyun.uploading.cfg +0 -0
  46. ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-01-58_ip-26-0-172-142/events.out.tfevents.1730365788.ip-26-0-172-142.451351.0 +3 -0
  47. ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-01-58_ip-26-0-172-142/events.out.tfevents.1730371773.ip-26-0-172-142.451351.1 +3 -0
  48. ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-19-57_ip-26-0-174-36/events.out.tfevents.1730366818.ip-26-0-174-36.3233632.0 +3 -0
  49. ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-20-43_ip-26-0-161-142/events.out.tfevents.1730366856.ip-26-0-161-142.1301887.0 +3 -0
  50. ComfyUI/models/smol/SmolLM2-360M-Instruct/special_tokens_map.json +34 -0
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
+ onnx/model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/README.md ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ language:
5
+ - en
6
+ pipeline_tag: text-generation
7
+ tags:
8
+ - safetensors
9
+ - onnx
10
+ - transformers.js
11
+ base_model:
12
+ - HuggingFaceTB/SmolLM2-1.7B
13
+ ---
14
+
15
+
16
+ # SmolLM2
17
+
18
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/y45hIMNREW7w_XpHYB_0q.png)
19
+
20
+ ## Table of Contents
21
+
22
+ 1. [Model Summary](#model-summary)
23
+ 2. [Evaluation](#evaluation)
24
+ 3. [Examples](#examples)
25
+ 4. [Limitations](#limitations)
26
+ 5. [Training](#training)
27
+ 6. [License](#license)
28
+ 7. [Citation](#citation)
29
+
30
+ ## Model Summary
31
+
32
+ SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
33
+
34
+ The 1.7B variant demonstrates significant advances over its predecessor SmolLM1-1.7B, particularly in instruction following, knowledge, reasoning, and mathematics. It was trained on 11 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new mathematics and coding datasets that we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using [UltraFeedback](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized).
35
+
36
+ The instruct model additionally supports tasks such as text rewriting, summarization and function calling thanks to datasets developed by [Argilla](https://huggingface.co/argilla) such as [Synth-APIGen-v0.1](https://huggingface.co/datasets/argilla/Synth-APIGen-v0.1).
37
+ You can find the SFT dataset here: https://huggingface.co/datasets/HuggingFaceTB/smoltalk.
38
+
39
+ For more details refer to: https://github.com/huggingface/smollm. You will find pre-training, post-training, evaluation and local inference code.
40
+
41
+ ### How to use
42
+
43
+ ### Transformers
44
+ ```bash
45
+ pip install transformers
46
+ ```
47
+
48
+ ```python
49
+ from transformers import AutoModelForCausalLM, AutoTokenizer
50
+ checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
51
+
52
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
53
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
54
+ # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
55
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
56
+
57
+ messages = [{"role": "user", "content": "What is the capital of France."}]
58
+ input_text=tokenizer.apply_chat_template(messages, tokenize=False)
59
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
60
+ outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
61
+ print(tokenizer.decode(outputs[0]))
62
+ ```
63
+
64
+
65
+ ### Chat in TRL
66
+ You can also use the TRL CLI to chat with the model from the terminal:
67
+ ```bash
68
+ pip install trl
69
+ trl chat --model_name_or_path HuggingFaceTB/SmolLM2-1.7B-Instruct --device cpu
70
+ ```
71
+
72
+ ## Evaluation
73
+
74
+ In this section, we report the evaluation results of SmolLM2. All evaluations are zero-shot unless stated otherwise, and we use [lighteval](https://github.com/huggingface/lighteval) to run them.
75
+
76
+ ## Base Pre-Trained Model
77
+
78
+ | Metric | SmolLM2-1.7B | Llama-1B | Qwen2.5-1.5B | SmolLM1-1.7B |
79
+ |------------------|--------------|-------------|---------------|--------------|
80
+ | HellaSwag | **68.7** | 61.2 | 66.4 | 62.9 |
81
+ | ARC (Average) | **60.5** | 49.2 | 58.5 | 59.9 |
82
+ | PIQA | **77.6** | 74.8 | 76.1 | 76.0 |
83
+ | MMLU-Pro (MCF) | **19.4** | 11.7 | 13.7 | 10.8 |
84
+ | CommonsenseQA | **43.6** | 41.2 | 34.1 | 38.0 |
85
+ | TriviaQA | **36.7** | 28.1 | 20.9 | 22.5 |
86
+ | Winogrande | **59.4** | 57.8 | 59.3 | 54.7 |
87
+ | OpenBookQA | 42.2 | 38.4 | 40.0 | **42.4** |
88
+ | GSM8K (5-shot) | 31.0 | 7.2 | **61.3** | 5.5 |
89
+
90
+ ## Instruction Model
91
+
92
+ | Metric | SmolLM2-1.7B-Instruct | Llama-1B-Instruct | Qwen2.5-1.5B-Instruct | SmolLM1-1.7B-Instruct |
93
+ |:-----------------------------|:---------------------:|:-----------------:|:----------------------:|:----------------------:|
94
+ | IFEval (Average prompt/inst) | **56.7** | 53.5 | 47.4 | 23.1 |
95
+ | MT-Bench | 6.13 | 5.48 | **6.52** | 4.33 |
96
+ | OpenRewrite-Eval (micro_avg RougeL) | 44.9 | 39.2 | **46.9** | NaN |
97
+ | HellaSwag | **66.1** | 56.1 | 60.9 | 55.5 |
98
+ | ARC (Average) | **51.7** | 41.6 | 46.2 | 43.7 |
99
+ | PIQA | **74.4** | 72.3 | 73.2 | 71.6 |
100
+ | MMLU-Pro (MCF) | 19.3 | 12.7 | **24.2** | 11.7 |
101
+ | BBH (3-shot) | 32.2 | 27.6 | **35.3** | 25.7 |
102
+ | GSM8K (5-shot) | **48.2** | 26.8 | 42.8 | 4.62 |
103
+
104
+
105
+ ## Examples
106
+ Below are some system and instruct prompts that work well for special tasks
107
+
108
+ ### Text rewriting
109
+
110
+ ```python
111
+ system_prompt_rewrite = "You are an AI writing assistant. Your task is to rewrite the user's email to make it more professional and approachable while maintaining its main points and key message. Do not return any text other than the rewritten message."
112
+ user_prompt_rewrite = "Rewrite the message below to make it more friendly and approachable while maintaining its main points and key message. Do not add any new information or return any text other than the rewritten message\nThe message:"
113
+ messages = [{"role": "system", "content": system_prompt_rewrite}, {"role": "user", "content":f"{user_prompt_rewrite} The CI is failing after your last commit!"}]
114
+ input_text=tokenizer.apply_chat_template(messages, tokenize=False)
115
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
116
+ outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
117
+ print(tokenizer.decode(outputs[0]))
118
+ ```
119
+ ```
120
+ Hey there! I noticed that the CI isn't passing after your latest commit. Could you take a look and let me know what's going on? Thanks so much for your help!
121
+ ```
122
+
123
+ ### Summarization
124
+
125
+ ```python
126
+ system_prompt_summarize = "Provide a concise, objective summary of the input text in up to three sentences, focusing on key actions and intentions without using second or third person pronouns."
127
+ messages = [{"role": "system", "content": system_prompt_summarize}, {"role": "user", "content": INSERT_LONG_EMAIL}]
128
+ input_text=tokenizer.apply_chat_template(messages, tokenize=False)
129
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
130
+ outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
131
+ print(tokenizer.decode(outputs[0]))
132
+ ```
133
+
134
+ ### Function calling
135
+
136
+ SmolLM2-1.7B-Instruct can handle function calling, it scores 27% on the [BFCL Leaderboard](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html). Here's how you can leverage it:
137
+
138
+ ```python
139
+ import json
140
+ import re
141
+ from typing import Optional
142
+
143
+ from jinja2 import Template
144
+ import torch
145
+ from transformers import AutoModelForCausalLM, AutoTokenizer
146
+ from transformers.utils import get_json_schema
147
+
148
+
149
+ system_prompt = Template("""You are an expert in composing functions. You are given a question and a set of possible functions.
150
+ Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
151
+ If none of the functions can be used, point it out and refuse to answer.
152
+ If the given question lacks the parameters required by the function, also point it out.
153
+
154
+ You have access to the following tools:
155
+ <tools>{{ tools }}</tools>
156
+
157
+ The output MUST strictly adhere to the following format, and NO other text MUST be included.
158
+ The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
159
+ <tool_call>[
160
+ {"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
161
+ ... (more tool calls as required)
162
+ ]</tool_call>""")
163
+
164
+
165
+ def prepare_messages(
166
+ query: str,
167
+ tools: Optional[dict[str, any]] = None,
168
+ history: Optional[list[dict[str, str]]] = None
169
+ ) -> list[dict[str, str]]:
170
+ """Prepare the system and user messages for the given query and tools.
171
+
172
+ Args:
173
+ query: The query to be answered.
174
+ tools: The tools available to the user. Defaults to None, in which case if a
175
+ list without content will be passed to the model.
176
+ history: Exchange of messages, including the system_prompt from
177
+ the first query. Defaults to None, the first message in a conversation.
178
+ """
179
+ if tools is None:
180
+ tools = []
181
+ if history:
182
+ messages = history.copy()
183
+ messages.append({"role": "user", "content": query})
184
+ else:
185
+ messages = [
186
+ {"role": "system", "content": system_prompt.render(tools=json.dumps(tools))},
187
+ {"role": "user", "content": query}
188
+ ]
189
+ return messages
190
+
191
+
192
+ def parse_response(text: str) -> str | dict[str, any]:
193
+ """Parses a response from the model, returning either the
194
+ parsed list with the tool calls parsed, or the
195
+ model thought or response if couldn't generate one.
196
+
197
+ Args:
198
+ text: Response from the model.
199
+ """
200
+ pattern = r"<tool_call>(.*?)</tool_call>"
201
+ matches = re.findall(pattern, text, re.DOTALL)
202
+ if matches:
203
+ return json.loads(matches[0])
204
+ return text
205
+
206
+
207
+ model_name_smollm = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
208
+ model = AutoModelForCausalLM.from_pretrained(model_name_smollm, device_map="auto", torch_dtype="auto", trust_remote_code=True)
209
+ tokenizer = AutoTokenizer.from_pretrained(model_name_smollm)
210
+
211
+ from datetime import datetime
212
+ import random
213
+
214
+ def get_current_time() -> str:
215
+ """Returns the current time in 24-hour format.
216
+
217
+ Returns:
218
+ str: Current time in HH:MM:SS format.
219
+ """
220
+ return datetime.now().strftime("%H:%M:%S")
221
+
222
+
223
+ def get_random_number_between(min: int, max: int) -> int:
224
+ """
225
+ Gets a random number between min and max.
226
+
227
+ Args:
228
+ min: The minimum number.
229
+ max: The maximum number.
230
+
231
+ Returns:
232
+ A random number between min and max.
233
+ """
234
+ return random.randint(min, max)
235
+
236
+
237
+ tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time)]
238
+
239
+ toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time}
240
+
241
+ query = "Give me a number between 1 and 300"
242
+
243
+ messages = prepare_messages(query, tools=tools)
244
+
245
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
246
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
247
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
248
+
249
+ tool_calls = parse_response(result)
250
+ # [{'name': 'get_random_number_between', 'arguments': {'min': 1, 'max': 300}}
251
+
252
+ # Get tool responses
253
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
254
+ # [63]
255
+
256
+ # For the second turn, rebuild the history of messages:
257
+ history = messages.copy()
258
+ # Add the "parsed response"
259
+ history.append({"role": "assistant", "content": result})
260
+ query = "Can you give me the hour?"
261
+ history.append({"role": "user", "content": query})
262
+
263
+ inputs = tokenizer.apply_chat_template(history, add_generation_prompt=True, return_tensors="pt").to(model.device)
264
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
265
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
266
+
267
+ tool_calls = parse_response(result)
268
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
269
+ # ['07:57:25']
270
+ ```
271
+ More details such as parallel function calls and tools not available can be found [here](https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct/blob/main/instructions_function_calling.md)
272
+
273
+ ## Limitations
274
+
275
+ SmolLM2 models primarily understand and generate content in English. They can produce text on a variety of topics, but the generated content may not always be factually accurate, logically consistent, or free from biases present in the training data. These models should be used as assistive tools rather than definitive sources of information. Users should always verify important information and critically evaluate any generated content.
276
+
277
+ ## Training
278
+
279
+ ### Model
280
+
281
+ - **Architecture:** Transformer decoder
282
+ - **Pretraining tokens:** 11T
283
+ - **Precision:** bfloat16
284
+
285
+ ### Hardware
286
+
287
+ - **GPUs:** 256 H100
288
+
289
+ ### Software
290
+
291
+ - **Training Framework:** [nanotron](https://github.com/huggingface/nanotron/tree/main)
292
+ - **Alignment Handbook** [alignment-handbook](https://github.com/huggingface/alignment-handbook/)
293
+
294
+ ## License
295
+
296
+ [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
297
+
298
+ ## Citation
299
+ ```bash
300
+ @misc{allal2024SmolLM2,
301
+ title={SmolLM2 - with great data, comes great performance},
302
+ author={Loubna Ben Allal and Anton Lozhkov and Elie Bakouch and Gabriel Martín Blázquez and Lewis Tunstall and Agustín Piqueres and Andres Marafioti and Cyril Zakka and Leandro von Werra and Thomas Wolf},
303
+ year={2024},
304
+ }
305
+ ```
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.996074326092646,
3
+ "eval_logits/chosen": -0.34099623560905457,
4
+ "eval_logits/rejected": -0.3685227334499359,
5
+ "eval_logps/chosen": -310.2510070800781,
6
+ "eval_logps/rejected": -275.43145751953125,
7
+ "eval_loss": 0.587827205657959,
8
+ "eval_rewards/accuracies": 0.6746031641960144,
9
+ "eval_rewards/chosen": 0.01673175022006035,
10
+ "eval_rewards/margins": 0.5906793475151062,
11
+ "eval_rewards/rejected": -0.573947548866272,
12
+ "eval_runtime": 18.8462,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 106.122,
15
+ "eval_steps_per_second": 3.343,
16
+ "total_flos": 0.0,
17
+ "train_loss": 0.5334697115221363,
18
+ "train_runtime": 7355.3343,
19
+ "train_samples": 61134,
20
+ "train_samples_per_second": 24.935,
21
+ "train_steps_per_second": 0.195
22
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 2048,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 8192,
13
+ "max_position_embeddings": 8192,
14
+ "mlp_bias": false,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 24,
18
+ "num_key_value_heads": 32,
19
+ "pad_token_id": 2,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 130000,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.3",
27
+ "transformers.js_config": {
28
+ "kv_cache_dtype": {
29
+ "q4f16": "float16",
30
+ "fp16": "float16"
31
+ }
32
+ },
33
+ "use_cache": true,
34
+ "vocab_size": 49152
35
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.996074326092646,
3
+ "eval_logits/chosen": -0.34099623560905457,
4
+ "eval_logits/rejected": -0.3685227334499359,
5
+ "eval_logps/chosen": -310.2510070800781,
6
+ "eval_logps/rejected": -275.43145751953125,
7
+ "eval_loss": 0.587827205657959,
8
+ "eval_rewards/accuracies": 0.6746031641960144,
9
+ "eval_rewards/chosen": 0.01673175022006035,
10
+ "eval_rewards/margins": 0.5906793475151062,
11
+ "eval_rewards/rejected": -0.573947548866272,
12
+ "eval_runtime": 18.8462,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 106.122,
15
+ "eval_steps_per_second": 3.343
16
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.42.3"
7
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/instructions_function_calling.md ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Quick start
2
+ Instructions for funtion calling:
3
+
4
+ ```python
5
+ import json
6
+ import re
7
+ from typing import Optional
8
+
9
+ from jinja2 import Template
10
+ import torch
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
+ from transformers.utils import get_json_schema
13
+
14
+
15
+ system_prompt = Template("""You are an expert in composing functions. You are given a question and a set of possible functions.
16
+ Based on the question, you will need to make one or more function/tool calls to achieve the purpose.
17
+ If none of the functions can be used, point it out and refuse to answer.
18
+ If the given question lacks the parameters required by the function, also point it out.
19
+
20
+ You have access to the following tools:
21
+ <tools>{{ tools }}</tools>
22
+
23
+ The output MUST strictly adhere to the following format, and NO other text MUST be included.
24
+ The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make the tool calls an empty list '[]'.
25
+ <tool_call>[
26
+ {"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},
27
+ ... (more tool calls as required)
28
+ ]</tool_call>""")
29
+
30
+
31
+ def prepare_messages(
32
+ query: str,
33
+ tools: Optional[dict[str, any]] = None,
34
+ history: Optional[list[dict[str, str]]] = None
35
+ ) -> list[dict[str, str]]:
36
+ """Prepare the system and user messages for the given query and tools.
37
+
38
+ Args:
39
+ query: The query to be answered.
40
+ tools: The tools available to the user. Defaults to None, in which case if a
41
+ list without content will be passed to the model.
42
+ history: Exchange of messages, including the system_prompt from
43
+ the first query. Defaults to None, the first message in a conversation.
44
+ """
45
+ if tools is None:
46
+ tools = []
47
+ if history:
48
+ messages = history.copy()
49
+ messages.append({"role": "user", "content": query})
50
+ else:
51
+ messages = [
52
+ {"role": "system", "content": system_prompt.render(tools=json.dumps(tools))},
53
+ {"role": "user", "content": query}
54
+ ]
55
+ return messages
56
+
57
+
58
+ def parse_response(text: str) -> str | dict[str, any]:
59
+ """Parses a response from the model, returning either the
60
+ parsed list with the tool calls parsed, or the
61
+ model thought or response if couldn't generate one.
62
+
63
+ Args:
64
+ text: Response from the model.
65
+ """
66
+ pattern = r"<tool_call>(.*?)</tool_call>"
67
+ matches = re.findall(pattern, text, re.DOTALL)
68
+ if matches:
69
+ return json.loads(matches[0])
70
+ return text
71
+
72
+ model_name_smollm = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
73
+ model = AutoModelForCausalLM.from_pretrained(model_name_smollm, device_map="auto", torch_dtype="auto", trust_remote_code=True)
74
+ tokenizer = AutoTokenizer.from_pretrained(model_name_smollm)
75
+
76
+ from datetime import datetime
77
+ import random
78
+
79
+ def get_current_time() -> str:
80
+ """Returns the current time in 24-hour format.
81
+
82
+ Returns:
83
+ str: Current time in HH:MM:SS format.
84
+ """
85
+ return datetime.now().strftime("%H:%M:%S")
86
+
87
+
88
+ def get_random_number_between(min: int, max: int) -> int:
89
+ """
90
+ Gets a random number between min and max.
91
+
92
+ Args:
93
+ min: The minimum number.
94
+ max: The maximum number.
95
+
96
+ Returns:
97
+ A random number between min and max.
98
+ """
99
+ return random.randint(min, max)
100
+
101
+
102
+ tools = [get_json_schema(get_random_number_between), get_json_schema(get_current_time)]
103
+
104
+ toolbox = {"get_random_number_between": get_random_number_between, "get_current_time": get_current_time}
105
+
106
+ query = "Give me a number between 1 and 300"
107
+
108
+ messages = prepare_messages(query, tools=tools)
109
+
110
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
111
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
112
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
113
+
114
+ tool_calls = parse_response(result)
115
+ # [{'name': 'get_random_number_between', 'arguments': {'min': 1, 'max': 300}}
116
+
117
+ # Get tool responses
118
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
119
+ # [63]
120
+
121
+ # For the second turn, rebuild the history of messages:
122
+ history = messages.copy()
123
+ # Add the "parsed response"
124
+ history.append({"role": "assistant", "content": result})
125
+ query = "Can you give me the hour?"
126
+ history.append({"role": "user", "content": query})
127
+
128
+ inputs = tokenizer.apply_chat_template(history, add_generation_prompt=True, return_tensors="pt").to(model.device)
129
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
130
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
131
+
132
+ tool_calls = parse_response(result)
133
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
134
+ # ['07:57:25']
135
+ ```
136
+
137
+ #### Parallel function calls
138
+
139
+ Multiple calls required by the same query.
140
+
141
+ ```python
142
+ query = "Can you give me the hour and a random number between 1 and 50?"
143
+
144
+ messages = prepare_messages(query, tools=tools)
145
+
146
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
147
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
148
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
149
+
150
+ tool_calls = parse_response(result)
151
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
152
+ # ['09:24:52', 50]
153
+
154
+ query = "Can you give me a random number between 1 and 10, other between 200 and 210 and another one between 55 and 60?"
155
+
156
+ messages = prepare_messages(query, tools=tools)
157
+
158
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
159
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
160
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
161
+
162
+ tool_calls = parse_response(result)
163
+ tool_responses = [toolbox.get(tc["name"])(*tc["arguments"].values()) for tc in tool_calls]
164
+ # [7, 202, 60]
165
+ ```
166
+
167
+ #### Tools not available
168
+
169
+ ```python
170
+ query = "Can you open a new page with youtube?"
171
+
172
+ messages = prepare_messages(query, tools=tools)
173
+
174
+ inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
175
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
176
+ result = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
177
+
178
+ tool_calls = parse_response(result)
179
+ # []
180
+
181
+ # The response will be something similar to the following:
182
+ # "The query cannot be answered with the provided tools. Please make sure the tools are correctly installed and imported. If the tools are not installed, install them using pip: 'pip install -r tools.txt'. If the tools are already installed, ensure they are correctly configured. If the tools are not correctly configured, please contact the support team. The output MUST strictly adhere to the following format, and NO other text MUST be included.\n\n<tool_call>[]</tool_call>"
183
+ ```
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55217be716b6a997b97b9d8d7eb6fad02e00858f5010ec24f64603c3a98a0e8
3
+ size 3422777952
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/model.safetensors.baiduyun.uploading.cfg ADDED
File without changes
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/runs/Oct31_06-24-59_ip-26-0-174-36/events.out.tfevents.1730356365.ip-26-0-174-36.3169719.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6bfce1916438dd2e6553aa0a62d418087b3ae04f8af75e714ad1f01b7663db6
3
+ size 114828
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/runs/Oct31_06-24-59_ip-26-0-174-36/events.out.tfevents.1730363825.ip-26-0-174-36.3169719.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3d7723fd0715ce6dcbccf7bb2097f59490b0ac670f798f5378ef5abb7d1301d
3
+ size 828
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/tokenizer_config.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
+ "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
+ "model_max_length": 2048,
150
+ "pad_token": "<|im_end|>",
151
+ "tokenizer_class": "GPT2Tokenizer",
152
+ "unk_token": "<|endoftext|>",
153
+ "vocab_size": 49152
154
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.996074326092646,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5334697115221363,
5
+ "train_runtime": 7355.3343,
6
+ "train_samples": 61134,
7
+ "train_samples_per_second": 24.935,
8
+ "train_steps_per_second": 0.195
9
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/trainer_state.json ADDED
@@ -0,0 +1,2426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.996074326092646,
5
+ "eval_steps": 100,
6
+ "global_step": 1431,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002093692750588851,
13
+ "grad_norm": 53.581159797745435,
14
+ "learning_rate": 6.9444444444444435e-09,
15
+ "logits/chosen": -0.48425233364105225,
16
+ "logits/rejected": -0.32109448313713074,
17
+ "logps/chosen": -276.5158996582031,
18
+ "logps/rejected": -302.22406005859375,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.02093692750588851,
28
+ "grad_norm": 47.92244929429545,
29
+ "learning_rate": 6.944444444444444e-08,
30
+ "logits/chosen": -0.20008452236652374,
31
+ "logits/rejected": -0.1900922954082489,
32
+ "logps/chosen": -337.452392578125,
33
+ "logps/rejected": -293.0785217285156,
34
+ "loss": 0.7231,
35
+ "rewards/accuracies": 0.3541666567325592,
36
+ "rewards/chosen": -0.022724969312548637,
37
+ "rewards/margins": -0.036751341074705124,
38
+ "rewards/rejected": 0.014026367105543613,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.04187385501177702,
43
+ "grad_norm": 41.37562973132045,
44
+ "learning_rate": 1.3888888888888888e-07,
45
+ "logits/chosen": -0.042457859963178635,
46
+ "logits/rejected": -0.12332990020513535,
47
+ "logps/chosen": -298.8910217285156,
48
+ "logps/rejected": -271.74114990234375,
49
+ "loss": 0.7114,
50
+ "rewards/accuracies": 0.4375,
51
+ "rewards/chosen": 0.016111990436911583,
52
+ "rewards/margins": -0.0034494102001190186,
53
+ "rewards/rejected": 0.0195614043623209,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.06281078251766553,
58
+ "grad_norm": 50.166233912169,
59
+ "learning_rate": 2.0833333333333333e-07,
60
+ "logits/chosen": -0.0645759329199791,
61
+ "logits/rejected": -0.09565907716751099,
62
+ "logps/chosen": -323.93743896484375,
63
+ "logps/rejected": -261.7337341308594,
64
+ "loss": 0.7165,
65
+ "rewards/accuracies": 0.4625000059604645,
66
+ "rewards/chosen": -0.056218355894088745,
67
+ "rewards/margins": 0.005095779895782471,
68
+ "rewards/rejected": -0.061314135789871216,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.08374771002355404,
73
+ "grad_norm": 45.45611748658909,
74
+ "learning_rate": 2.7777777777777776e-07,
75
+ "logits/chosen": -0.13370418548583984,
76
+ "logits/rejected": -0.16684015095233917,
77
+ "logps/chosen": -272.07928466796875,
78
+ "logps/rejected": -251.01589965820312,
79
+ "loss": 0.7166,
80
+ "rewards/accuracies": 0.6000000238418579,
81
+ "rewards/chosen": 0.006498994771391153,
82
+ "rewards/margins": 0.06555742770433426,
83
+ "rewards/rejected": -0.05905843526124954,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.10468463752944256,
88
+ "grad_norm": 55.30410487113166,
89
+ "learning_rate": 3.472222222222222e-07,
90
+ "logits/chosen": -0.0792478546500206,
91
+ "logits/rejected": -0.0551227442920208,
92
+ "logps/chosen": -292.2696838378906,
93
+ "logps/rejected": -266.82965087890625,
94
+ "loss": 0.7243,
95
+ "rewards/accuracies": 0.5,
96
+ "rewards/chosen": -0.08804600685834885,
97
+ "rewards/margins": -0.04349173232913017,
98
+ "rewards/rejected": -0.044554274529218674,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.12562156503533106,
103
+ "grad_norm": 56.63523816899612,
104
+ "learning_rate": 4.1666666666666667e-07,
105
+ "logits/chosen": -0.1644289791584015,
106
+ "logits/rejected": -0.19546563923358917,
107
+ "logps/chosen": -269.5990295410156,
108
+ "logps/rejected": -258.08721923828125,
109
+ "loss": 0.7419,
110
+ "rewards/accuracies": 0.518750011920929,
111
+ "rewards/chosen": 0.04003779590129852,
112
+ "rewards/margins": 0.05106702446937561,
113
+ "rewards/rejected": -0.011029230430722237,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.14655849254121958,
118
+ "grad_norm": 50.48428456914726,
119
+ "learning_rate": 4.861111111111111e-07,
120
+ "logits/chosen": -0.1528700441122055,
121
+ "logits/rejected": -0.12956030666828156,
122
+ "logps/chosen": -334.1960144042969,
123
+ "logps/rejected": -295.05865478515625,
124
+ "loss": 0.7096,
125
+ "rewards/accuracies": 0.5375000238418579,
126
+ "rewards/chosen": -0.012429716996848583,
127
+ "rewards/margins": 0.03767075017094612,
128
+ "rewards/rejected": -0.05010046809911728,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.16749542004710807,
133
+ "grad_norm": 49.10207941889299,
134
+ "learning_rate": 5.555555555555555e-07,
135
+ "logits/chosen": 0.019698064774274826,
136
+ "logits/rejected": -0.05375131219625473,
137
+ "logps/chosen": -324.42889404296875,
138
+ "logps/rejected": -256.3900451660156,
139
+ "loss": 0.7125,
140
+ "rewards/accuracies": 0.574999988079071,
141
+ "rewards/chosen": 0.0010342865716665983,
142
+ "rewards/margins": 0.04288307949900627,
143
+ "rewards/rejected": -0.0418488010764122,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.1884323475529966,
148
+ "grad_norm": 48.35701594054669,
149
+ "learning_rate": 6.249999999999999e-07,
150
+ "logits/chosen": -0.18192127346992493,
151
+ "logits/rejected": -0.15880808234214783,
152
+ "logps/chosen": -282.22161865234375,
153
+ "logps/rejected": -251.54296875,
154
+ "loss": 0.6976,
155
+ "rewards/accuracies": 0.4937500059604645,
156
+ "rewards/chosen": -0.012317812070250511,
157
+ "rewards/margins": -0.011760599911212921,
158
+ "rewards/rejected": -0.0005572110530920327,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.2093692750588851,
163
+ "grad_norm": 44.92592146146067,
164
+ "learning_rate": 6.944444444444444e-07,
165
+ "logits/chosen": -0.11696960031986237,
166
+ "logits/rejected": -0.13915769755840302,
167
+ "logps/chosen": -316.96527099609375,
168
+ "logps/rejected": -277.3512268066406,
169
+ "loss": 0.6787,
170
+ "rewards/accuracies": 0.5562499761581421,
171
+ "rewards/chosen": 0.03938128799200058,
172
+ "rewards/margins": 0.08465500175952911,
173
+ "rewards/rejected": -0.045273713767528534,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.2093692750588851,
178
+ "eval_logits/chosen": -0.3141140341758728,
179
+ "eval_logits/rejected": -0.3377441465854645,
180
+ "eval_logps/chosen": -310.252685546875,
181
+ "eval_logps/rejected": -274.4240417480469,
182
+ "eval_loss": 0.6966869235038757,
183
+ "eval_rewards/accuracies": 0.5515872836112976,
184
+ "eval_rewards/chosen": 0.01587979681789875,
185
+ "eval_rewards/margins": 0.08612197637557983,
186
+ "eval_rewards/rejected": -0.07024218887090683,
187
+ "eval_runtime": 19.212,
188
+ "eval_samples_per_second": 104.102,
189
+ "eval_steps_per_second": 3.279,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.23030620256477363,
194
+ "grad_norm": 42.34774239140926,
195
+ "learning_rate": 7.638888888888888e-07,
196
+ "logits/chosen": -0.15527260303497314,
197
+ "logits/rejected": -0.1998877376317978,
198
+ "logps/chosen": -317.2166748046875,
199
+ "logps/rejected": -267.8033447265625,
200
+ "loss": 0.6869,
201
+ "rewards/accuracies": 0.5562499761581421,
202
+ "rewards/chosen": -0.0031774670351296663,
203
+ "rewards/margins": 0.06427686661481857,
204
+ "rewards/rejected": -0.06745433807373047,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 0.2512431300706621,
209
+ "grad_norm": 42.69543501672853,
210
+ "learning_rate": 8.333333333333333e-07,
211
+ "logits/chosen": -0.08257915079593658,
212
+ "logits/rejected": -0.12443940341472626,
213
+ "logps/chosen": -275.12579345703125,
214
+ "logps/rejected": -245.96347045898438,
215
+ "loss": 0.6706,
216
+ "rewards/accuracies": 0.5562499761581421,
217
+ "rewards/chosen": 0.050213612616062164,
218
+ "rewards/margins": 0.10280221700668335,
219
+ "rewards/rejected": -0.052588604390621185,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.2721800575765506,
224
+ "grad_norm": 50.775507526530916,
225
+ "learning_rate": 9.027777777777778e-07,
226
+ "logits/chosen": -0.14645054936408997,
227
+ "logits/rejected": -0.2005746066570282,
228
+ "logps/chosen": -313.00238037109375,
229
+ "logps/rejected": -257.37945556640625,
230
+ "loss": 0.6662,
231
+ "rewards/accuracies": 0.6187499761581421,
232
+ "rewards/chosen": 0.12051638215780258,
233
+ "rewards/margins": 0.18545812368392944,
234
+ "rewards/rejected": -0.06494174152612686,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.29311698508243916,
239
+ "grad_norm": 45.04510687958917,
240
+ "learning_rate": 9.722222222222222e-07,
241
+ "logits/chosen": -0.18603107333183289,
242
+ "logits/rejected": -0.14804694056510925,
243
+ "logps/chosen": -336.45330810546875,
244
+ "logps/rejected": -270.71844482421875,
245
+ "loss": 0.6837,
246
+ "rewards/accuracies": 0.574999988079071,
247
+ "rewards/chosen": 0.1061311587691307,
248
+ "rewards/margins": 0.20069436728954315,
249
+ "rewards/rejected": -0.09456320852041245,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.31405391258832765,
254
+ "grad_norm": 47.07473020466515,
255
+ "learning_rate": 9.999463737538052e-07,
256
+ "logits/chosen": -0.09479381889104843,
257
+ "logits/rejected": -0.18823939561843872,
258
+ "logps/chosen": -300.7477111816406,
259
+ "logps/rejected": -261.5872497558594,
260
+ "loss": 0.6701,
261
+ "rewards/accuracies": 0.543749988079071,
262
+ "rewards/chosen": 0.014584893360733986,
263
+ "rewards/margins": 0.1319422572851181,
264
+ "rewards/rejected": -0.11735737323760986,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 0.33499084009421615,
269
+ "grad_norm": 40.24946321953983,
270
+ "learning_rate": 9.996186994612174e-07,
271
+ "logits/chosen": -0.07166972011327744,
272
+ "logits/rejected": -0.05619664117693901,
273
+ "logps/chosen": -277.3063049316406,
274
+ "logps/rejected": -264.21929931640625,
275
+ "loss": 0.6571,
276
+ "rewards/accuracies": 0.6499999761581421,
277
+ "rewards/chosen": -0.002465812023729086,
278
+ "rewards/margins": 0.21548476815223694,
279
+ "rewards/rejected": -0.21795055270195007,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.3559277676001047,
284
+ "grad_norm": 43.25422484420196,
285
+ "learning_rate": 9.989933382359422e-07,
286
+ "logits/chosen": -0.22567839920520782,
287
+ "logits/rejected": -0.2581842541694641,
288
+ "logps/chosen": -270.6056213378906,
289
+ "logps/rejected": -234.0198211669922,
290
+ "loss": 0.6614,
291
+ "rewards/accuracies": 0.6499999761581421,
292
+ "rewards/chosen": -0.0034525603987276554,
293
+ "rewards/margins": 0.24006681144237518,
294
+ "rewards/rejected": -0.2435193508863449,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 0.3768646951059932,
299
+ "grad_norm": 39.47362541006732,
300
+ "learning_rate": 9.980706626858607e-07,
301
+ "logits/chosen": -0.15520626306533813,
302
+ "logits/rejected": -0.13547591865062714,
303
+ "logps/chosen": -275.5711364746094,
304
+ "logps/rejected": -280.95904541015625,
305
+ "loss": 0.6323,
306
+ "rewards/accuracies": 0.699999988079071,
307
+ "rewards/chosen": 0.01858050376176834,
308
+ "rewards/margins": 0.3474423885345459,
309
+ "rewards/rejected": -0.32886189222335815,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.39780162261188173,
314
+ "grad_norm": 45.3164057858113,
315
+ "learning_rate": 9.968512225671258e-07,
316
+ "logits/chosen": -0.07492430508136749,
317
+ "logits/rejected": -0.07894851267337799,
318
+ "logps/chosen": -287.3063659667969,
319
+ "logps/rejected": -286.6043395996094,
320
+ "loss": 0.649,
321
+ "rewards/accuracies": 0.612500011920929,
322
+ "rewards/chosen": -0.07111965119838715,
323
+ "rewards/margins": 0.22652164101600647,
324
+ "rewards/rejected": -0.2976413071155548,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 0.4187385501177702,
329
+ "grad_norm": 51.9773301288437,
330
+ "learning_rate": 9.953357444566038e-07,
331
+ "logits/chosen": -0.09818016737699509,
332
+ "logits/rejected": -0.11751595884561539,
333
+ "logps/chosen": -284.3682861328125,
334
+ "logps/rejected": -264.2423095703125,
335
+ "loss": 0.645,
336
+ "rewards/accuracies": 0.5687500238418579,
337
+ "rewards/chosen": -0.06741360574960709,
338
+ "rewards/margins": 0.13478204607963562,
339
+ "rewards/rejected": -0.2021956443786621,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.4187385501177702,
344
+ "eval_logits/chosen": -0.3229367136955261,
345
+ "eval_logits/rejected": -0.34633249044418335,
346
+ "eval_logps/chosen": -310.3840026855469,
347
+ "eval_logps/rejected": -274.8875732421875,
348
+ "eval_loss": 0.6491106748580933,
349
+ "eval_rewards/accuracies": 0.60317462682724,
350
+ "eval_rewards/chosen": -0.04975215345621109,
351
+ "eval_rewards/margins": 0.25227656960487366,
352
+ "eval_rewards/rejected": -0.30202868580818176,
353
+ "eval_runtime": 19.4848,
354
+ "eval_samples_per_second": 102.644,
355
+ "eval_steps_per_second": 3.233,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.4396754776236587,
360
+ "grad_norm": 42.51852593559685,
361
+ "learning_rate": 9.935251313189563e-07,
362
+ "logits/chosen": 0.02242279052734375,
363
+ "logits/rejected": -0.06930123269557953,
364
+ "logps/chosen": -321.75689697265625,
365
+ "logps/rejected": -269.69781494140625,
366
+ "loss": 0.6528,
367
+ "rewards/accuracies": 0.612500011920929,
368
+ "rewards/chosen": -0.06879940629005432,
369
+ "rewards/margins": 0.2159956693649292,
370
+ "rewards/rejected": -0.2847950756549835,
371
+ "step": 210
372
+ },
373
+ {
374
+ "epoch": 0.46061240512954726,
375
+ "grad_norm": 39.56904653144407,
376
+ "learning_rate": 9.914204619686312e-07,
377
+ "logits/chosen": -0.21375274658203125,
378
+ "logits/rejected": -0.06523901224136353,
379
+ "logps/chosen": -263.26251220703125,
380
+ "logps/rejected": -254.17361450195312,
381
+ "loss": 0.6473,
382
+ "rewards/accuracies": 0.574999988079071,
383
+ "rewards/chosen": -0.09464406222105026,
384
+ "rewards/margins": 0.11034605652093887,
385
+ "rewards/rejected": -0.20499010384082794,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.48154933263543576,
390
+ "grad_norm": 49.49502162405857,
391
+ "learning_rate": 9.89022990427073e-07,
392
+ "logits/chosen": -0.1386515200138092,
393
+ "logits/rejected": -0.027841920033097267,
394
+ "logps/chosen": -275.6785583496094,
395
+ "logps/rejected": -282.2340393066406,
396
+ "loss": 0.651,
397
+ "rewards/accuracies": 0.6875,
398
+ "rewards/chosen": -0.0026134043000638485,
399
+ "rewards/margins": 0.34593018889427185,
400
+ "rewards/rejected": -0.34854358434677124,
401
+ "step": 230
402
+ },
403
+ {
404
+ "epoch": 0.5024862601413242,
405
+ "grad_norm": 42.40141104653558,
406
+ "learning_rate": 9.86334145175542e-07,
407
+ "logits/chosen": -0.16665732860565186,
408
+ "logits/rejected": -0.13909348845481873,
409
+ "logps/chosen": -282.2431640625,
410
+ "logps/rejected": -265.6712341308594,
411
+ "loss": 0.6269,
412
+ "rewards/accuracies": 0.5562499761581421,
413
+ "rewards/chosen": -0.03417225927114487,
414
+ "rewards/margins": 0.2094908207654953,
415
+ "rewards/rejected": -0.24366307258605957,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.5234231876472127,
420
+ "grad_norm": 44.711963555505086,
421
+ "learning_rate": 9.83355528303984e-07,
422
+ "logits/chosen": -0.21623122692108154,
423
+ "logits/rejected": -0.25620579719543457,
424
+ "logps/chosen": -321.9479064941406,
425
+ "logps/rejected": -276.6851501464844,
426
+ "loss": 0.6483,
427
+ "rewards/accuracies": 0.59375,
428
+ "rewards/chosen": -0.033338677138090134,
429
+ "rewards/margins": 0.21741199493408203,
430
+ "rewards/rejected": -0.25075066089630127,
431
+ "step": 250
432
+ },
433
+ {
434
+ "epoch": 0.5443601151531012,
435
+ "grad_norm": 44.08715279117413,
436
+ "learning_rate": 9.800889145564616e-07,
437
+ "logits/chosen": -0.023601394146680832,
438
+ "logits/rejected": -0.08905109018087387,
439
+ "logps/chosen": -307.87286376953125,
440
+ "logps/rejected": -257.384765625,
441
+ "loss": 0.6489,
442
+ "rewards/accuracies": 0.6000000238418579,
443
+ "rewards/chosen": -0.03719106689095497,
444
+ "rewards/margins": 0.2362823486328125,
445
+ "rewards/rejected": -0.2734734117984772,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.5652970426589898,
450
+ "grad_norm": 39.7549215734126,
451
+ "learning_rate": 9.765362502737097e-07,
452
+ "logits/chosen": -0.15661786496639252,
453
+ "logits/rejected": -0.1753096580505371,
454
+ "logps/chosen": -313.56787109375,
455
+ "logps/rejected": -277.8585510253906,
456
+ "loss": 0.6233,
457
+ "rewards/accuracies": 0.637499988079071,
458
+ "rewards/chosen": 0.07719334214925766,
459
+ "rewards/margins": 0.3890419602394104,
460
+ "rewards/rejected": -0.31184864044189453,
461
+ "step": 270
462
+ },
463
+ {
464
+ "epoch": 0.5862339701648783,
465
+ "grad_norm": 46.953763281596736,
466
+ "learning_rate": 9.726996522334514e-07,
467
+ "logits/chosen": -0.22773197293281555,
468
+ "logits/rejected": -0.25082847476005554,
469
+ "logps/chosen": -299.99127197265625,
470
+ "logps/rejected": -249.6244659423828,
471
+ "loss": 0.6409,
472
+ "rewards/accuracies": 0.6312500238418579,
473
+ "rewards/chosen": -0.044135063886642456,
474
+ "rewards/margins": 0.2800499200820923,
475
+ "rewards/rejected": -0.32418501377105713,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 0.6071708976707668,
480
+ "grad_norm": 38.382090654521065,
481
+ "learning_rate": 9.68581406389163e-07,
482
+ "logits/chosen": -0.14438043534755707,
483
+ "logits/rejected": -0.17269106209278107,
484
+ "logps/chosen": -282.90325927734375,
485
+ "logps/rejected": -274.38079833984375,
486
+ "loss": 0.6217,
487
+ "rewards/accuracies": 0.643750011920929,
488
+ "rewards/chosen": -0.009734408929944038,
489
+ "rewards/margins": 0.3537730574607849,
490
+ "rewards/rejected": -0.363507479429245,
491
+ "step": 290
492
+ },
493
+ {
494
+ "epoch": 0.6281078251766553,
495
+ "grad_norm": 41.7647459812509,
496
+ "learning_rate": 9.641839665080363e-07,
497
+ "logits/chosen": -0.1018938273191452,
498
+ "logits/rejected": -0.08353041112422943,
499
+ "logps/chosen": -316.49810791015625,
500
+ "logps/rejected": -277.4931640625,
501
+ "loss": 0.6161,
502
+ "rewards/accuracies": 0.6937500238418579,
503
+ "rewards/chosen": 0.03871753811836243,
504
+ "rewards/margins": 0.39340126514434814,
505
+ "rewards/rejected": -0.3546837270259857,
506
+ "step": 300
507
+ },
508
+ {
509
+ "epoch": 0.6281078251766553,
510
+ "eval_logits/chosen": -0.33166003227233887,
511
+ "eval_logits/rejected": -0.3552355170249939,
512
+ "eval_logps/chosen": -310.4118957519531,
513
+ "eval_logps/rejected": -275.1272277832031,
514
+ "eval_loss": 0.6316225528717041,
515
+ "eval_rewards/accuracies": 0.682539701461792,
516
+ "eval_rewards/chosen": -0.06370855122804642,
517
+ "eval_rewards/margins": 0.3581322431564331,
518
+ "eval_rewards/rejected": -0.4218408465385437,
519
+ "eval_runtime": 19.0833,
520
+ "eval_samples_per_second": 104.804,
521
+ "eval_steps_per_second": 3.301,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 0.6490447526825438,
526
+ "grad_norm": 40.9716290999305,
527
+ "learning_rate": 9.595099527089568e-07,
528
+ "logits/chosen": -0.11042879521846771,
529
+ "logits/rejected": -0.18008281290531158,
530
+ "logps/chosen": -323.7516174316406,
531
+ "logps/rejected": -247.0631561279297,
532
+ "loss": 0.6007,
533
+ "rewards/accuracies": 0.7124999761581421,
534
+ "rewards/chosen": 0.10534234344959259,
535
+ "rewards/margins": 0.5895902514457703,
536
+ "rewards/rejected": -0.48424798250198364,
537
+ "step": 310
538
+ },
539
+ {
540
+ "epoch": 0.6699816801884323,
541
+ "grad_norm": 45.14020951986602,
542
+ "learning_rate": 9.545621499013618e-07,
543
+ "logits/chosen": -0.024479269981384277,
544
+ "logits/rejected": -0.038955364376306534,
545
+ "logps/chosen": -294.4266662597656,
546
+ "logps/rejected": -252.0673828125,
547
+ "loss": 0.5995,
548
+ "rewards/accuracies": 0.668749988079071,
549
+ "rewards/chosen": 0.06134527176618576,
550
+ "rewards/margins": 0.4816582202911377,
551
+ "rewards/rejected": -0.4203129708766937,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 0.6909186076943209,
556
+ "grad_norm": 38.030813091938484,
557
+ "learning_rate": 9.493435061259129e-07,
558
+ "logits/chosen": -0.012772688642144203,
559
+ "logits/rejected": -0.030413877218961716,
560
+ "logps/chosen": -291.36761474609375,
561
+ "logps/rejected": -265.4248046875,
562
+ "loss": 0.6232,
563
+ "rewards/accuracies": 0.6187499761581421,
564
+ "rewards/chosen": 0.001638062298297882,
565
+ "rewards/margins": 0.40076717734336853,
566
+ "rewards/rejected": -0.39912909269332886,
567
+ "step": 330
568
+ },
569
+ {
570
+ "epoch": 0.7118555352002094,
571
+ "grad_norm": 45.93570520280886,
572
+ "learning_rate": 9.438571307979704e-07,
573
+ "logits/chosen": -0.04617694020271301,
574
+ "logits/rejected": -0.07658630609512329,
575
+ "logps/chosen": -296.7699890136719,
576
+ "logps/rejected": -270.9905090332031,
577
+ "loss": 0.6096,
578
+ "rewards/accuracies": 0.6812499761581421,
579
+ "rewards/chosen": -0.05764853209257126,
580
+ "rewards/margins": 0.28755250573158264,
581
+ "rewards/rejected": -0.34520095586776733,
582
+ "step": 340
583
+ },
584
+ {
585
+ "epoch": 0.7327924627060979,
586
+ "grad_norm": 44.09128545700975,
587
+ "learning_rate": 9.381062928549151e-07,
588
+ "logits/chosen": -0.12188152968883514,
589
+ "logits/rejected": -0.13376249372959137,
590
+ "logps/chosen": -282.15179443359375,
591
+ "logps/rejected": -247.45834350585938,
592
+ "loss": 0.5964,
593
+ "rewards/accuracies": 0.6187499761581421,
594
+ "rewards/chosen": -0.09005177021026611,
595
+ "rewards/margins": 0.41418519616127014,
596
+ "rewards/rejected": -0.5042369961738586,
597
+ "step": 350
598
+ },
599
+ {
600
+ "epoch": 0.7537293902119864,
601
+ "grad_norm": 40.36784058818516,
602
+ "learning_rate": 9.320944188084241e-07,
603
+ "logits/chosen": -0.07130910456180573,
604
+ "logits/rejected": -0.18685731291770935,
605
+ "logps/chosen": -340.55487060546875,
606
+ "logps/rejected": -278.08551025390625,
607
+ "loss": 0.6256,
608
+ "rewards/accuracies": 0.65625,
609
+ "rewards/chosen": 0.1223798543214798,
610
+ "rewards/margins": 0.4791669249534607,
611
+ "rewards/rejected": -0.3567870557308197,
612
+ "step": 360
613
+ },
614
+ {
615
+ "epoch": 0.7746663177178749,
616
+ "grad_norm": 37.74697617657793,
617
+ "learning_rate": 9.258250907028572e-07,
618
+ "logits/chosen": -0.2023000717163086,
619
+ "logits/rejected": -0.25707709789276123,
620
+ "logps/chosen": -306.526611328125,
621
+ "logps/rejected": -248.40023803710938,
622
+ "loss": 0.603,
623
+ "rewards/accuracies": 0.6625000238418579,
624
+ "rewards/chosen": 0.05354640632867813,
625
+ "rewards/margins": 0.42885661125183105,
626
+ "rewards/rejected": -0.3753102421760559,
627
+ "step": 370
628
+ },
629
+ {
630
+ "epoch": 0.7956032452237635,
631
+ "grad_norm": 36.818935480552355,
632
+ "learning_rate": 9.193020439809746e-07,
633
+ "logits/chosen": -0.25006169080734253,
634
+ "logits/rejected": -0.27239733934402466,
635
+ "logps/chosen": -306.9652099609375,
636
+ "logps/rejected": -274.15850830078125,
637
+ "loss": 0.6108,
638
+ "rewards/accuracies": 0.6937500238418579,
639
+ "rewards/chosen": -0.005802587606012821,
640
+ "rewards/margins": 0.5873938798904419,
641
+ "rewards/rejected": -0.5931965112686157,
642
+ "step": 380
643
+ },
644
+ {
645
+ "epoch": 0.816540172729652,
646
+ "grad_norm": 40.218709775991016,
647
+ "learning_rate": 9.125291652582547e-07,
648
+ "logits/chosen": -0.19798080623149872,
649
+ "logits/rejected": -0.16009968519210815,
650
+ "logps/chosen": -308.05975341796875,
651
+ "logps/rejected": -289.6929626464844,
652
+ "loss": 0.6163,
653
+ "rewards/accuracies": 0.6937500238418579,
654
+ "rewards/chosen": 0.09920313209295273,
655
+ "rewards/margins": 0.47261109948158264,
656
+ "rewards/rejected": -0.3734079897403717,
657
+ "step": 390
658
+ },
659
+ {
660
+ "epoch": 0.8374771002355405,
661
+ "grad_norm": 39.13573330990029,
662
+ "learning_rate": 9.055104900071375e-07,
663
+ "logits/chosen": -0.3008892834186554,
664
+ "logits/rejected": -0.1968032568693161,
665
+ "logps/chosen": -253.58798217773438,
666
+ "logps/rejected": -239.539794921875,
667
+ "loss": 0.5964,
668
+ "rewards/accuracies": 0.78125,
669
+ "rewards/chosen": 0.14220505952835083,
670
+ "rewards/margins": 0.6365345120429993,
671
+ "rewards/rejected": -0.49432945251464844,
672
+ "step": 400
673
+ },
674
+ {
675
+ "epoch": 0.8374771002355405,
676
+ "eval_logits/chosen": -0.3291381299495697,
677
+ "eval_logits/rejected": -0.35453417897224426,
678
+ "eval_logps/chosen": -310.317626953125,
679
+ "eval_logps/rejected": -275.15972900390625,
680
+ "eval_loss": 0.6100037693977356,
681
+ "eval_rewards/accuracies": 0.658730149269104,
682
+ "eval_rewards/chosen": -0.016582150012254715,
683
+ "eval_rewards/margins": 0.42150571942329407,
684
+ "eval_rewards/rejected": -0.43808794021606445,
685
+ "eval_runtime": 19.1374,
686
+ "eval_samples_per_second": 104.507,
687
+ "eval_steps_per_second": 3.292,
688
+ "step": 400
689
+ },
690
+ {
691
+ "epoch": 0.8584140277414289,
692
+ "grad_norm": 41.59836395581152,
693
+ "learning_rate": 8.982502001525777e-07,
694
+ "logits/chosen": -0.05211949348449707,
695
+ "logits/rejected": -0.16379515826702118,
696
+ "logps/chosen": -334.3597717285156,
697
+ "logps/rejected": -280.1282043457031,
698
+ "loss": 0.6085,
699
+ "rewards/accuracies": 0.706250011920929,
700
+ "rewards/chosen": 0.10034932941198349,
701
+ "rewards/margins": 0.5413358211517334,
702
+ "rewards/rejected": -0.4409864842891693,
703
+ "step": 410
704
+ },
705
+ {
706
+ "epoch": 0.8793509552473174,
707
+ "grad_norm": 43.48433949710367,
708
+ "learning_rate": 8.90752621580335e-07,
709
+ "logits/chosen": -0.10794836282730103,
710
+ "logits/rejected": -0.10175999253988266,
711
+ "logps/chosen": -289.7253723144531,
712
+ "logps/rejected": -290.4524841308594,
713
+ "loss": 0.5967,
714
+ "rewards/accuracies": 0.737500011920929,
715
+ "rewards/chosen": 0.03317990154027939,
716
+ "rewards/margins": 0.5273382663726807,
717
+ "rewards/rejected": -0.4941583275794983,
718
+ "step": 420
719
+ },
720
+ {
721
+ "epoch": 0.9002878827532059,
722
+ "grad_norm": 38.200809011500716,
723
+ "learning_rate": 8.83022221559489e-07,
724
+ "logits/chosen": -0.08915697038173676,
725
+ "logits/rejected": -0.18852075934410095,
726
+ "logps/chosen": -325.81707763671875,
727
+ "logps/rejected": -257.1981506347656,
728
+ "loss": 0.5916,
729
+ "rewards/accuracies": 0.637499988079071,
730
+ "rewards/chosen": 0.053056418895721436,
731
+ "rewards/margins": 0.5633317232131958,
732
+ "rewards/rejected": -0.5102753043174744,
733
+ "step": 430
734
+ },
735
+ {
736
+ "epoch": 0.9212248102590945,
737
+ "grad_norm": 37.18511688193762,
738
+ "learning_rate": 8.750636060807145e-07,
739
+ "logits/chosen": -0.0535130575299263,
740
+ "logits/rejected": -0.1133667603135109,
741
+ "logps/chosen": -299.4546813964844,
742
+ "logps/rejected": -248.877685546875,
743
+ "loss": 0.6032,
744
+ "rewards/accuracies": 0.6312500238418579,
745
+ "rewards/chosen": 0.14839240908622742,
746
+ "rewards/margins": 0.4819253385066986,
747
+ "rewards/rejected": -0.3335329294204712,
748
+ "step": 440
749
+ },
750
+ {
751
+ "epoch": 0.942161737764983,
752
+ "grad_norm": 35.5219084800687,
753
+ "learning_rate": 8.668815171119019e-07,
754
+ "logits/chosen": -0.16904090344905853,
755
+ "logits/rejected": -0.12047908455133438,
756
+ "logps/chosen": -297.3148498535156,
757
+ "logps/rejected": -285.4595031738281,
758
+ "loss": 0.6148,
759
+ "rewards/accuracies": 0.6937500238418579,
760
+ "rewards/chosen": 0.051097553223371506,
761
+ "rewards/margins": 0.505901038646698,
762
+ "rewards/rejected": -0.4548035264015198,
763
+ "step": 450
764
+ },
765
+ {
766
+ "epoch": 0.9630986652708715,
767
+ "grad_norm": 40.701223424626065,
768
+ "learning_rate": 8.584808297727591e-07,
769
+ "logits/chosen": -0.07098624855279922,
770
+ "logits/rejected": -0.11652670055627823,
771
+ "logps/chosen": -291.76434326171875,
772
+ "logps/rejected": -246.9138641357422,
773
+ "loss": 0.6087,
774
+ "rewards/accuracies": 0.643750011920929,
775
+ "rewards/chosen": 0.10639622062444687,
776
+ "rewards/margins": 0.5133938193321228,
777
+ "rewards/rejected": -0.40699753165245056,
778
+ "step": 460
779
+ },
780
+ {
781
+ "epoch": 0.98403559277676,
782
+ "grad_norm": 41.789096950373676,
783
+ "learning_rate": 8.498665494300771e-07,
784
+ "logits/chosen": -0.19742052257061005,
785
+ "logits/rejected": -0.16556285321712494,
786
+ "logps/chosen": -302.3099060058594,
787
+ "logps/rejected": -277.933837890625,
788
+ "loss": 0.5919,
789
+ "rewards/accuracies": 0.625,
790
+ "rewards/chosen": 0.026506727561354637,
791
+ "rewards/margins": 0.43121033906936646,
792
+ "rewards/rejected": -0.40470361709594727,
793
+ "step": 470
794
+ },
795
+ {
796
+ "epoch": 1.0049725202826485,
797
+ "grad_norm": 34.0036850645019,
798
+ "learning_rate": 8.410438087153911e-07,
799
+ "logits/chosen": -0.05161554738879204,
800
+ "logits/rejected": -0.1036214604973793,
801
+ "logps/chosen": -300.4537658691406,
802
+ "logps/rejected": -268.56048583984375,
803
+ "loss": 0.5867,
804
+ "rewards/accuracies": 0.6625000238418579,
805
+ "rewards/chosen": -0.00218582758679986,
806
+ "rewards/margins": 0.4712887704372406,
807
+ "rewards/rejected": -0.4734745919704437,
808
+ "step": 480
809
+ },
810
+ {
811
+ "epoch": 1.025909447788537,
812
+ "grad_norm": 36.00706898235858,
813
+ "learning_rate": 8.320178644668141e-07,
814
+ "logits/chosen": -0.11621465533971786,
815
+ "logits/rejected": -0.18176773190498352,
816
+ "logps/chosen": -299.8227233886719,
817
+ "logps/rejected": -256.3122253417969,
818
+ "loss": 0.54,
819
+ "rewards/accuracies": 0.7562500238418579,
820
+ "rewards/chosen": 0.12694445252418518,
821
+ "rewards/margins": 0.600742757320404,
822
+ "rewards/rejected": -0.47379836440086365,
823
+ "step": 490
824
+ },
825
+ {
826
+ "epoch": 1.0468463752944255,
827
+ "grad_norm": 36.99527595331141,
828
+ "learning_rate": 8.22794094596864e-07,
829
+ "logits/chosen": -0.1485815942287445,
830
+ "logits/rejected": -0.20506855845451355,
831
+ "logps/chosen": -305.8435363769531,
832
+ "logps/rejected": -270.3700256347656,
833
+ "loss": 0.5394,
834
+ "rewards/accuracies": 0.7124999761581421,
835
+ "rewards/chosen": 0.029846886172890663,
836
+ "rewards/margins": 0.6080519556999207,
837
+ "rewards/rejected": -0.5782050490379333,
838
+ "step": 500
839
+ },
840
+ {
841
+ "epoch": 1.0468463752944255,
842
+ "eval_logits/chosen": -0.33202826976776123,
843
+ "eval_logits/rejected": -0.35762789845466614,
844
+ "eval_logps/chosen": -310.303955078125,
845
+ "eval_logps/rejected": -275.2332458496094,
846
+ "eval_loss": 0.6065964102745056,
847
+ "eval_rewards/accuracies": 0.7103174328804016,
848
+ "eval_rewards/chosen": -0.009755274280905724,
849
+ "eval_rewards/margins": 0.465115487575531,
850
+ "eval_rewards/rejected": -0.4748707413673401,
851
+ "eval_runtime": 18.9724,
852
+ "eval_samples_per_second": 105.416,
853
+ "eval_steps_per_second": 3.321,
854
+ "step": 500
855
+ },
856
+ {
857
+ "epoch": 1.067783302800314,
858
+ "grad_norm": 33.8725028749791,
859
+ "learning_rate": 8.133779948881513e-07,
860
+ "logits/chosen": -0.18220725655555725,
861
+ "logits/rejected": -0.1532759964466095,
862
+ "logps/chosen": -269.54156494140625,
863
+ "logps/rejected": -259.2676086425781,
864
+ "loss": 0.533,
865
+ "rewards/accuracies": 0.78125,
866
+ "rewards/chosen": 0.12358293682336807,
867
+ "rewards/margins": 0.7069646120071411,
868
+ "rewards/rejected": -0.5833816528320312,
869
+ "step": 510
870
+ },
871
+ {
872
+ "epoch": 1.0887202303062025,
873
+ "grad_norm": 32.98315936865049,
874
+ "learning_rate": 8.037751757188367e-07,
875
+ "logits/chosen": -0.2609891891479492,
876
+ "logits/rejected": -0.1464134156703949,
877
+ "logps/chosen": -262.7670593261719,
878
+ "logps/rejected": -260.43121337890625,
879
+ "loss": 0.5216,
880
+ "rewards/accuracies": 0.7437499761581421,
881
+ "rewards/chosen": 0.10809774696826935,
882
+ "rewards/margins": 0.6094505786895752,
883
+ "rewards/rejected": -0.501352846622467,
884
+ "step": 520
885
+ },
886
+ {
887
+ "epoch": 1.109657157812091,
888
+ "grad_norm": 36.61105396566652,
889
+ "learning_rate": 7.939913587198095e-07,
890
+ "logits/chosen": -0.2482234686613083,
891
+ "logits/rejected": -0.3337472081184387,
892
+ "logps/chosen": -288.12847900390625,
893
+ "logps/rejected": -249.4586181640625,
894
+ "loss": 0.5439,
895
+ "rewards/accuracies": 0.71875,
896
+ "rewards/chosen": 0.05145542696118355,
897
+ "rewards/margins": 0.7037261128425598,
898
+ "rewards/rejected": -0.6522706747055054,
899
+ "step": 530
900
+ },
901
+ {
902
+ "epoch": 1.1305940853179797,
903
+ "grad_norm": 38.97199731536176,
904
+ "learning_rate": 7.840323733655778e-07,
905
+ "logits/chosen": -0.19931235909461975,
906
+ "logits/rejected": -0.09966816008090973,
907
+ "logps/chosen": -259.0215759277344,
908
+ "logps/rejected": -237.9501495361328,
909
+ "loss": 0.5534,
910
+ "rewards/accuracies": 0.731249988079071,
911
+ "rewards/chosen": 0.12085232883691788,
912
+ "rewards/margins": 0.6305907964706421,
913
+ "rewards/rejected": -0.5097384452819824,
914
+ "step": 540
915
+ },
916
+ {
917
+ "epoch": 1.151531012823868,
918
+ "grad_norm": 35.71159175636079,
919
+ "learning_rate": 7.739041535009041e-07,
920
+ "logits/chosen": -0.14787928760051727,
921
+ "logits/rejected": -0.24329273402690887,
922
+ "logps/chosen": -305.64697265625,
923
+ "logps/rejected": -244.08755493164062,
924
+ "loss": 0.5301,
925
+ "rewards/accuracies": 0.706250011920929,
926
+ "rewards/chosen": 0.08590878546237946,
927
+ "rewards/margins": 0.6826232075691223,
928
+ "rewards/rejected": -0.596714437007904,
929
+ "step": 550
930
+ },
931
+ {
932
+ "epoch": 1.1724679403297567,
933
+ "grad_norm": 40.90404154922072,
934
+ "learning_rate": 7.636127338052511e-07,
935
+ "logits/chosen": -0.10747908055782318,
936
+ "logits/rejected": -0.11999531090259552,
937
+ "logps/chosen": -294.2823486328125,
938
+ "logps/rejected": -265.80010986328125,
939
+ "loss": 0.5442,
940
+ "rewards/accuracies": 0.7562500238418579,
941
+ "rewards/chosen": 0.1678900569677353,
942
+ "rewards/margins": 0.6707212924957275,
943
+ "rewards/rejected": -0.502831220626831,
944
+ "step": 560
945
+ },
946
+ {
947
+ "epoch": 1.193404867835645,
948
+ "grad_norm": 36.957100723172736,
949
+ "learning_rate": 7.531642461971514e-07,
950
+ "logits/chosen": -0.24748548865318298,
951
+ "logits/rejected": -0.13145090639591217,
952
+ "logps/chosen": -270.5980529785156,
953
+ "logps/rejected": -276.2918395996094,
954
+ "loss": 0.5296,
955
+ "rewards/accuracies": 0.71875,
956
+ "rewards/chosen": 0.1355637013912201,
957
+ "rewards/margins": 0.7545391917228699,
958
+ "rewards/rejected": -0.6189755797386169,
959
+ "step": 570
960
+ },
961
+ {
962
+ "epoch": 1.2143417953415336,
963
+ "grad_norm": 36.431278984426584,
964
+ "learning_rate": 7.425649161806352e-07,
965
+ "logits/chosen": -0.22467997670173645,
966
+ "logits/rejected": -0.20354416966438293,
967
+ "logps/chosen": -284.69439697265625,
968
+ "logps/rejected": -250.2189483642578,
969
+ "loss": 0.5209,
970
+ "rewards/accuracies": 0.7562500238418579,
971
+ "rewards/chosen": 0.03756130859255791,
972
+ "rewards/margins": 0.7149747610092163,
973
+ "rewards/rejected": -0.6774134635925293,
974
+ "step": 580
975
+ },
976
+ {
977
+ "epoch": 1.235278722847422,
978
+ "grad_norm": 34.3971566508403,
979
+ "learning_rate": 7.318210591359008e-07,
980
+ "logits/chosen": -0.02497723139822483,
981
+ "logits/rejected": -0.0003952041151933372,
982
+ "logps/chosen": -316.3713684082031,
983
+ "logps/rejected": -274.1322937011719,
984
+ "loss": 0.5209,
985
+ "rewards/accuracies": 0.768750011920929,
986
+ "rewards/chosen": 0.07891981303691864,
987
+ "rewards/margins": 0.7175663113594055,
988
+ "rewards/rejected": -0.6386464834213257,
989
+ "step": 590
990
+ },
991
+ {
992
+ "epoch": 1.2562156503533106,
993
+ "grad_norm": 37.248359587110805,
994
+ "learning_rate": 7.209390765564318e-07,
995
+ "logits/chosen": -0.15565916895866394,
996
+ "logits/rejected": -0.10866830497980118,
997
+ "logps/chosen": -306.9658203125,
998
+ "logps/rejected": -306.49078369140625,
999
+ "loss": 0.5099,
1000
+ "rewards/accuracies": 0.706250011920929,
1001
+ "rewards/chosen": 0.1327342391014099,
1002
+ "rewards/margins": 0.7248380780220032,
1003
+ "rewards/rejected": -0.5921037197113037,
1004
+ "step": 600
1005
+ },
1006
+ {
1007
+ "epoch": 1.2562156503533106,
1008
+ "eval_logits/chosen": -0.33804643154144287,
1009
+ "eval_logits/rejected": -0.3634737432003021,
1010
+ "eval_logps/chosen": -310.3228759765625,
1011
+ "eval_logps/rejected": -275.3493347167969,
1012
+ "eval_loss": 0.6006649136543274,
1013
+ "eval_rewards/accuracies": 0.6785714030265808,
1014
+ "eval_rewards/chosen": -0.019210144877433777,
1015
+ "eval_rewards/margins": 0.5136736631393433,
1016
+ "eval_rewards/rejected": -0.5328837633132935,
1017
+ "eval_runtime": 19.1325,
1018
+ "eval_samples_per_second": 104.534,
1019
+ "eval_steps_per_second": 3.293,
1020
+ "step": 600
1021
+ },
1022
+ {
1023
+ "epoch": 1.2771525778591992,
1024
+ "grad_norm": 39.48209474577774,
1025
+ "learning_rate": 7.099254522348064e-07,
1026
+ "logits/chosen": -0.09819002449512482,
1027
+ "logits/rejected": -0.1692955642938614,
1028
+ "logps/chosen": -290.13140869140625,
1029
+ "logps/rejected": -237.3570098876953,
1030
+ "loss": 0.5202,
1031
+ "rewards/accuracies": 0.7749999761581421,
1032
+ "rewards/chosen": 0.1899166852235794,
1033
+ "rewards/margins": 0.8487440347671509,
1034
+ "rewards/rejected": -0.6588274240493774,
1035
+ "step": 610
1036
+ },
1037
+ {
1038
+ "epoch": 1.2980895053650876,
1039
+ "grad_norm": 35.38064058006504,
1040
+ "learning_rate": 6.987867483994716e-07,
1041
+ "logits/chosen": 0.08088377118110657,
1042
+ "logits/rejected": -0.028685202822089195,
1043
+ "logps/chosen": -286.01324462890625,
1044
+ "logps/rejected": -242.1278076171875,
1045
+ "loss": 0.5113,
1046
+ "rewards/accuracies": 0.731249988079071,
1047
+ "rewards/chosen": 0.08664991706609726,
1048
+ "rewards/margins": 0.6891879439353943,
1049
+ "rewards/rejected": -0.6025381088256836,
1050
+ "step": 620
1051
+ },
1052
+ {
1053
+ "epoch": 1.3190264328709762,
1054
+ "grad_norm": 36.280579625575726,
1055
+ "learning_rate": 6.875296018047809e-07,
1056
+ "logits/chosen": -0.15825395286083221,
1057
+ "logits/rejected": -0.13115188479423523,
1058
+ "logps/chosen": -291.768798828125,
1059
+ "logps/rejected": -271.0327453613281,
1060
+ "loss": 0.5133,
1061
+ "rewards/accuracies": 0.706250011920929,
1062
+ "rewards/chosen": 0.25919127464294434,
1063
+ "rewards/margins": 0.8277707099914551,
1064
+ "rewards/rejected": -0.5685793161392212,
1065
+ "step": 630
1066
+ },
1067
+ {
1068
+ "epoch": 1.3399633603768648,
1069
+ "grad_norm": 32.57724385812019,
1070
+ "learning_rate": 6.761607197766296e-07,
1071
+ "logits/chosen": -0.21067364513874054,
1072
+ "logits/rejected": -0.15081565082073212,
1073
+ "logps/chosen": -272.00518798828125,
1074
+ "logps/rejected": -279.40478515625,
1075
+ "loss": 0.5105,
1076
+ "rewards/accuracies": 0.762499988079071,
1077
+ "rewards/chosen": 0.14864543080329895,
1078
+ "rewards/margins": 0.7687338590621948,
1079
+ "rewards/rejected": -0.6200884580612183,
1080
+ "step": 640
1081
+ },
1082
+ {
1083
+ "epoch": 1.3609002878827532,
1084
+ "grad_norm": 37.48616481446126,
1085
+ "learning_rate": 6.646868762160398e-07,
1086
+ "logits/chosen": -0.19943957030773163,
1087
+ "logits/rejected": -0.161566823720932,
1088
+ "logps/chosen": -268.23638916015625,
1089
+ "logps/rejected": -252.7967987060547,
1090
+ "loss": 0.5372,
1091
+ "rewards/accuracies": 0.699999988079071,
1092
+ "rewards/chosen": 0.008694097399711609,
1093
+ "rewards/margins": 0.623866617679596,
1094
+ "rewards/rejected": -0.6151725053787231,
1095
+ "step": 650
1096
+ },
1097
+ {
1098
+ "epoch": 1.3818372153886418,
1099
+ "grad_norm": 39.563737783736045,
1100
+ "learning_rate": 6.531149075630796e-07,
1101
+ "logits/chosen": -0.11065585911273956,
1102
+ "logits/rejected": -0.17729897797107697,
1103
+ "logps/chosen": -263.4093933105469,
1104
+ "logps/rejected": -251.0884246826172,
1105
+ "loss": 0.5323,
1106
+ "rewards/accuracies": 0.6875,
1107
+ "rewards/chosen": -0.026829296723008156,
1108
+ "rewards/margins": 0.4506412148475647,
1109
+ "rewards/rejected": -0.4774704873561859,
1110
+ "step": 660
1111
+ },
1112
+ {
1113
+ "epoch": 1.4027741428945302,
1114
+ "grad_norm": 33.46296192971815,
1115
+ "learning_rate": 6.414517087235185e-07,
1116
+ "logits/chosen": -0.20335140824317932,
1117
+ "logits/rejected": -0.2963979244232178,
1118
+ "logps/chosen": -293.1023864746094,
1119
+ "logps/rejected": -253.64291381835938,
1120
+ "loss": 0.5049,
1121
+ "rewards/accuracies": 0.71875,
1122
+ "rewards/chosen": 0.00017919539823196828,
1123
+ "rewards/margins": 0.5864929556846619,
1124
+ "rewards/rejected": -0.5863137245178223,
1125
+ "step": 670
1126
+ },
1127
+ {
1128
+ "epoch": 1.4237110704004188,
1129
+ "grad_norm": 40.03881951253335,
1130
+ "learning_rate": 6.297042289606479e-07,
1131
+ "logits/chosen": -0.13722732663154602,
1132
+ "logits/rejected": -0.1250244826078415,
1133
+ "logps/chosen": -295.9792785644531,
1134
+ "logps/rejected": -296.83038330078125,
1135
+ "loss": 0.5133,
1136
+ "rewards/accuracies": 0.6812499761581421,
1137
+ "rewards/chosen": -0.014182251878082752,
1138
+ "rewards/margins": 0.5892963409423828,
1139
+ "rewards/rejected": -0.6034785509109497,
1140
+ "step": 680
1141
+ },
1142
+ {
1143
+ "epoch": 1.4446479979063072,
1144
+ "grad_norm": 29.250271504061402,
1145
+ "learning_rate": 6.178794677547137e-07,
1146
+ "logits/chosen": -0.18004044890403748,
1147
+ "logits/rejected": -0.1459943801164627,
1148
+ "logps/chosen": -298.6792297363281,
1149
+ "logps/rejected": -257.5166931152344,
1150
+ "loss": 0.5157,
1151
+ "rewards/accuracies": 0.7124999761581421,
1152
+ "rewards/chosen": 0.07589882612228394,
1153
+ "rewards/margins": 0.7056849598884583,
1154
+ "rewards/rejected": -0.6297860741615295,
1155
+ "step": 690
1156
+ },
1157
+ {
1158
+ "epoch": 1.4655849254121958,
1159
+ "grad_norm": 32.49512416553128,
1160
+ "learning_rate": 6.059844706324286e-07,
1161
+ "logits/chosen": -0.07275749742984772,
1162
+ "logits/rejected": 0.005568481981754303,
1163
+ "logps/chosen": -314.0689697265625,
1164
+ "logps/rejected": -328.4625244140625,
1165
+ "loss": 0.5056,
1166
+ "rewards/accuracies": 0.762499988079071,
1167
+ "rewards/chosen": 0.13801348209381104,
1168
+ "rewards/margins": 0.876538872718811,
1169
+ "rewards/rejected": -0.738525390625,
1170
+ "step": 700
1171
+ },
1172
+ {
1173
+ "epoch": 1.4655849254121958,
1174
+ "eval_logits/chosen": -0.3406723737716675,
1175
+ "eval_logits/rejected": -0.36724287271499634,
1176
+ "eval_logps/chosen": -310.4104309082031,
1177
+ "eval_logps/rejected": -275.4717102050781,
1178
+ "eval_loss": 0.5875913500785828,
1179
+ "eval_rewards/accuracies": 0.6904761791229248,
1180
+ "eval_rewards/chosen": -0.06298798322677612,
1181
+ "eval_rewards/margins": 0.5310800671577454,
1182
+ "eval_rewards/rejected": -0.5940679311752319,
1183
+ "eval_runtime": 19.0211,
1184
+ "eval_samples_per_second": 105.146,
1185
+ "eval_steps_per_second": 3.312,
1186
+ "step": 700
1187
+ },
1188
+ {
1189
+ "epoch": 1.4865218529180844,
1190
+ "grad_norm": 35.75327607918745,
1191
+ "learning_rate": 5.940263249690477e-07,
1192
+ "logits/chosen": -0.24695155024528503,
1193
+ "logits/rejected": -0.22096967697143555,
1194
+ "logps/chosen": -295.2511901855469,
1195
+ "logps/rejected": -276.71051025390625,
1196
+ "loss": 0.5132,
1197
+ "rewards/accuracies": 0.731249988079071,
1198
+ "rewards/chosen": 0.058757662773132324,
1199
+ "rewards/margins": 0.7594215273857117,
1200
+ "rewards/rejected": -0.7006638646125793,
1201
+ "step": 710
1202
+ },
1203
+ {
1204
+ "epoch": 1.5074587804239727,
1205
+ "grad_norm": 41.16638143070179,
1206
+ "learning_rate": 5.820121557655108e-07,
1207
+ "logits/chosen": -0.1299329400062561,
1208
+ "logits/rejected": -0.10874001681804657,
1209
+ "logps/chosen": -303.56732177734375,
1210
+ "logps/rejected": -280.8491516113281,
1211
+ "loss": 0.5135,
1212
+ "rewards/accuracies": 0.7437499761581421,
1213
+ "rewards/chosen": 0.11438952386379242,
1214
+ "rewards/margins": 0.7343131899833679,
1215
+ "rewards/rejected": -0.6199236512184143,
1216
+ "step": 720
1217
+ },
1218
+ {
1219
+ "epoch": 1.5283957079298613,
1220
+ "grad_norm": 41.49005357475057,
1221
+ "learning_rate": 5.699491214031657e-07,
1222
+ "logits/chosen": -0.14048215746879578,
1223
+ "logits/rejected": -0.18900522589683533,
1224
+ "logps/chosen": -280.69598388671875,
1225
+ "logps/rejected": -255.81298828125,
1226
+ "loss": 0.5166,
1227
+ "rewards/accuracies": 0.8125,
1228
+ "rewards/chosen": 0.11929772049188614,
1229
+ "rewards/margins": 0.7799339294433594,
1230
+ "rewards/rejected": -0.6606361269950867,
1231
+ "step": 730
1232
+ },
1233
+ {
1234
+ "epoch": 1.54933263543575,
1235
+ "grad_norm": 34.97801212344648,
1236
+ "learning_rate": 5.578444093786008e-07,
1237
+ "logits/chosen": 0.06411169469356537,
1238
+ "logits/rejected": 0.15786513686180115,
1239
+ "logps/chosen": -315.06182861328125,
1240
+ "logps/rejected": -282.93115234375,
1241
+ "loss": 0.5128,
1242
+ "rewards/accuracies": 0.7437499761581421,
1243
+ "rewards/chosen": 0.24365051090717316,
1244
+ "rewards/margins": 0.8540836572647095,
1245
+ "rewards/rejected": -0.6104331612586975,
1246
+ "step": 740
1247
+ },
1248
+ {
1249
+ "epoch": 1.5702695629416383,
1250
+ "grad_norm": 32.53803724377542,
1251
+ "learning_rate": 5.457052320211339e-07,
1252
+ "logits/chosen": -0.22511212527751923,
1253
+ "logits/rejected": -0.22842809557914734,
1254
+ "logps/chosen": -298.0352783203125,
1255
+ "logps/rejected": -268.8421936035156,
1256
+ "loss": 0.4961,
1257
+ "rewards/accuracies": 0.6937500238418579,
1258
+ "rewards/chosen": 0.09702634066343307,
1259
+ "rewards/margins": 0.632623016834259,
1260
+ "rewards/rejected": -0.5355967283248901,
1261
+ "step": 750
1262
+ },
1263
+ {
1264
+ "epoch": 1.5912064904475267,
1265
+ "grad_norm": 36.857273577646474,
1266
+ "learning_rate": 5.335388221955012e-07,
1267
+ "logits/chosen": -0.044903479516506195,
1268
+ "logits/rejected": 0.0046501667238771915,
1269
+ "logps/chosen": -358.9960632324219,
1270
+ "logps/rejected": -336.44354248046875,
1271
+ "loss": 0.4997,
1272
+ "rewards/accuracies": 0.6812499761581421,
1273
+ "rewards/chosen": 0.28686124086380005,
1274
+ "rewards/margins": 0.7968252301216125,
1275
+ "rewards/rejected": -0.5099639296531677,
1276
+ "step": 760
1277
+ },
1278
+ {
1279
+ "epoch": 1.6121434179534153,
1280
+ "grad_norm": 31.737574949447513,
1281
+ "learning_rate": 5.213524289923126e-07,
1282
+ "logits/chosen": -0.03366886079311371,
1283
+ "logits/rejected": -0.18763691186904907,
1284
+ "logps/chosen": -327.0910339355469,
1285
+ "logps/rejected": -272.4969787597656,
1286
+ "loss": 0.5114,
1287
+ "rewards/accuracies": 0.7562500238418579,
1288
+ "rewards/chosen": 0.2613201141357422,
1289
+ "rewards/margins": 0.8974519968032837,
1290
+ "rewards/rejected": -0.6361318826675415,
1291
+ "step": 770
1292
+ },
1293
+ {
1294
+ "epoch": 1.633080345459304,
1295
+ "grad_norm": 34.36622996446216,
1296
+ "learning_rate": 5.091533134088387e-07,
1297
+ "logits/chosen": -0.12894697487354279,
1298
+ "logits/rejected": -0.142494797706604,
1299
+ "logps/chosen": -320.5164794921875,
1300
+ "logps/rejected": -280.9849548339844,
1301
+ "loss": 0.5146,
1302
+ "rewards/accuracies": 0.7749999761581421,
1303
+ "rewards/chosen": 0.27436262369155884,
1304
+ "rewards/margins": 0.8554395437240601,
1305
+ "rewards/rejected": -0.581076979637146,
1306
+ "step": 780
1307
+ },
1308
+ {
1309
+ "epoch": 1.6540172729651923,
1310
+ "grad_norm": 38.709599843434866,
1311
+ "learning_rate": 4.969487440227038e-07,
1312
+ "logits/chosen": -0.21234026551246643,
1313
+ "logits/rejected": -0.22163410484790802,
1314
+ "logps/chosen": -300.9823303222656,
1315
+ "logps/rejected": -276.39508056640625,
1316
+ "loss": 0.4852,
1317
+ "rewards/accuracies": 0.78125,
1318
+ "rewards/chosen": 0.24367956817150116,
1319
+ "rewards/margins": 0.9389246106147766,
1320
+ "rewards/rejected": -0.6952449679374695,
1321
+ "step": 790
1322
+ },
1323
+ {
1324
+ "epoch": 1.674954200471081,
1325
+ "grad_norm": 31.812270366410278,
1326
+ "learning_rate": 4.847459926610619e-07,
1327
+ "logits/chosen": -0.018023919314146042,
1328
+ "logits/rejected": -0.12209578603506088,
1329
+ "logps/chosen": -350.00439453125,
1330
+ "logps/rejected": -305.96954345703125,
1331
+ "loss": 0.4936,
1332
+ "rewards/accuracies": 0.768750011920929,
1333
+ "rewards/chosen": 0.1853758990764618,
1334
+ "rewards/margins": 0.8932290077209473,
1335
+ "rewards/rejected": -0.7078530788421631,
1336
+ "step": 800
1337
+ },
1338
+ {
1339
+ "epoch": 1.674954200471081,
1340
+ "eval_logits/chosen": -0.3383854031562805,
1341
+ "eval_logits/rejected": -0.36581355333328247,
1342
+ "eval_logps/chosen": -310.3436584472656,
1343
+ "eval_logps/rejected": -275.40155029296875,
1344
+ "eval_loss": 0.5994271636009216,
1345
+ "eval_rewards/accuracies": 0.6746031641960144,
1346
+ "eval_rewards/chosen": -0.029591551050543785,
1347
+ "eval_rewards/margins": 0.5294089317321777,
1348
+ "eval_rewards/rejected": -0.559000551700592,
1349
+ "eval_runtime": 19.1971,
1350
+ "eval_samples_per_second": 104.182,
1351
+ "eval_steps_per_second": 3.282,
1352
+ "step": 800
1353
+ },
1354
+ {
1355
+ "epoch": 1.6958911279769695,
1356
+ "grad_norm": 35.049859279627235,
1357
+ "learning_rate": 4.7255233006783624e-07,
1358
+ "logits/chosen": -0.18587855994701385,
1359
+ "logits/rejected": -0.16283151507377625,
1360
+ "logps/chosen": -307.56439208984375,
1361
+ "logps/rejected": -278.2900390625,
1362
+ "loss": 0.5055,
1363
+ "rewards/accuracies": 0.768750011920929,
1364
+ "rewards/chosen": 0.10620725154876709,
1365
+ "rewards/margins": 0.892121434211731,
1366
+ "rewards/rejected": -0.7859140634536743,
1367
+ "step": 810
1368
+ },
1369
+ {
1370
+ "epoch": 1.7168280554828579,
1371
+ "grad_norm": 34.134111938828184,
1372
+ "learning_rate": 4.6037502157160567e-07,
1373
+ "logits/chosen": -0.2755785584449768,
1374
+ "logits/rejected": -0.31054019927978516,
1375
+ "logps/chosen": -320.56695556640625,
1376
+ "logps/rejected": -273.4154968261719,
1377
+ "loss": 0.5048,
1378
+ "rewards/accuracies": 0.7437499761581421,
1379
+ "rewards/chosen": 0.20151178538799286,
1380
+ "rewards/margins": 0.9227310419082642,
1381
+ "rewards/rejected": -0.7212191820144653,
1382
+ "step": 820
1383
+ },
1384
+ {
1385
+ "epoch": 1.7377649829887463,
1386
+ "grad_norm": 38.68230830225304,
1387
+ "learning_rate": 4.482213227567161e-07,
1388
+ "logits/chosen": -0.2091369926929474,
1389
+ "logits/rejected": -0.14194951951503754,
1390
+ "logps/chosen": -277.35693359375,
1391
+ "logps/rejected": -281.7249450683594,
1392
+ "loss": 0.484,
1393
+ "rewards/accuracies": 0.7562500238418579,
1394
+ "rewards/chosen": 0.10125979036092758,
1395
+ "rewards/margins": 0.8731620907783508,
1396
+ "rewards/rejected": -0.7719023823738098,
1397
+ "step": 830
1398
+ },
1399
+ {
1400
+ "epoch": 1.7587019104946349,
1401
+ "grad_norm": 33.945256383172804,
1402
+ "learning_rate": 4.3609847514019763e-07,
1403
+ "logits/chosen": -0.30677181482315063,
1404
+ "logits/rejected": -0.1776028573513031,
1405
+ "logps/chosen": -261.38873291015625,
1406
+ "logps/rejected": -267.8268127441406,
1407
+ "loss": 0.5094,
1408
+ "rewards/accuracies": 0.7437499761581421,
1409
+ "rewards/chosen": 0.18778590857982635,
1410
+ "rewards/margins": 0.8258863687515259,
1411
+ "rewards/rejected": -0.6381004452705383,
1412
+ "step": 840
1413
+ },
1414
+ {
1415
+ "epoch": 1.7796388380005235,
1416
+ "grad_norm": 34.20224125304341,
1417
+ "learning_rate": 4.240137018570661e-07,
1418
+ "logits/chosen": -0.11399509757757187,
1419
+ "logits/rejected": -0.1185418963432312,
1420
+ "logps/chosen": -275.3580017089844,
1421
+ "logps/rejected": -282.1163024902344,
1422
+ "loss": 0.5074,
1423
+ "rewards/accuracies": 0.737500011920929,
1424
+ "rewards/chosen": -0.03538894280791283,
1425
+ "rewards/margins": 0.7777508497238159,
1426
+ "rewards/rejected": -0.813139796257019,
1427
+ "step": 850
1428
+ },
1429
+ {
1430
+ "epoch": 1.8005757655064119,
1431
+ "grad_norm": 31.946083788257877,
1432
+ "learning_rate": 4.1197420335657366e-07,
1433
+ "logits/chosen": -0.09024197608232498,
1434
+ "logits/rejected": -0.20059815049171448,
1435
+ "logps/chosen": -298.12762451171875,
1436
+ "logps/rejected": -242.2689666748047,
1437
+ "loss": 0.5089,
1438
+ "rewards/accuracies": 0.75,
1439
+ "rewards/chosen": 0.15691380202770233,
1440
+ "rewards/margins": 0.8090406656265259,
1441
+ "rewards/rejected": -0.6521269083023071,
1442
+ "step": 860
1443
+ },
1444
+ {
1445
+ "epoch": 1.8215126930123005,
1446
+ "grad_norm": 28.583891688236477,
1447
+ "learning_rate": 3.9998715311197783e-07,
1448
+ "logits/chosen": -0.1159423366189003,
1449
+ "logits/rejected": -0.16631600260734558,
1450
+ "logps/chosen": -337.1079406738281,
1451
+ "logps/rejected": -285.5828857421875,
1452
+ "loss": 0.4894,
1453
+ "rewards/accuracies": 0.7124999761581421,
1454
+ "rewards/chosen": 0.11767788976430893,
1455
+ "rewards/margins": 0.6672422289848328,
1456
+ "rewards/rejected": -0.5495643615722656,
1457
+ "step": 870
1458
+ },
1459
+ {
1460
+ "epoch": 1.842449620518189,
1461
+ "grad_norm": 33.76166639382152,
1462
+ "learning_rate": 3.880596933463843e-07,
1463
+ "logits/chosen": -0.07985590398311615,
1464
+ "logits/rejected": -0.03050703927874565,
1465
+ "logps/chosen": -274.9603576660156,
1466
+ "logps/rejected": -266.6165771484375,
1467
+ "loss": 0.5075,
1468
+ "rewards/accuracies": 0.731249988079071,
1469
+ "rewards/chosen": 0.09320048242807388,
1470
+ "rewards/margins": 0.7023938298225403,
1471
+ "rewards/rejected": -0.6091933250427246,
1472
+ "step": 880
1473
+ },
1474
+ {
1475
+ "epoch": 1.8633865480240774,
1476
+ "grad_norm": 40.29693979290027,
1477
+ "learning_rate": 3.761989307772085e-07,
1478
+ "logits/chosen": -0.15825888514518738,
1479
+ "logits/rejected": -0.1759234219789505,
1480
+ "logps/chosen": -306.653076171875,
1481
+ "logps/rejected": -279.01177978515625,
1482
+ "loss": 0.5055,
1483
+ "rewards/accuracies": 0.8187500238418579,
1484
+ "rewards/chosen": 0.16093602776527405,
1485
+ "rewards/margins": 0.937556266784668,
1486
+ "rewards/rejected": -0.7766203284263611,
1487
+ "step": 890
1488
+ },
1489
+ {
1490
+ "epoch": 1.8843234755299658,
1491
+ "grad_norm": 36.002728955160904,
1492
+ "learning_rate": 3.6441193238179146e-07,
1493
+ "logits/chosen": -0.05288013815879822,
1494
+ "logits/rejected": -0.13031360507011414,
1495
+ "logps/chosen": -334.34417724609375,
1496
+ "logps/rejected": -269.53558349609375,
1497
+ "loss": 0.4904,
1498
+ "rewards/accuracies": 0.731249988079071,
1499
+ "rewards/chosen": 0.14437799155712128,
1500
+ "rewards/margins": 0.8378474116325378,
1501
+ "rewards/rejected": -0.6934694051742554,
1502
+ "step": 900
1503
+ },
1504
+ {
1505
+ "epoch": 1.8843234755299658,
1506
+ "eval_logits/chosen": -0.3443281650543213,
1507
+ "eval_logits/rejected": -0.3704880475997925,
1508
+ "eval_logps/chosen": -310.4006042480469,
1509
+ "eval_logps/rejected": -275.5133972167969,
1510
+ "eval_loss": 0.5989052653312683,
1511
+ "eval_rewards/accuracies": 0.6944444179534912,
1512
+ "eval_rewards/chosen": -0.05807310715317726,
1513
+ "eval_rewards/margins": 0.55684494972229,
1514
+ "eval_rewards/rejected": -0.614918053150177,
1515
+ "eval_runtime": 18.9036,
1516
+ "eval_samples_per_second": 105.8,
1517
+ "eval_steps_per_second": 3.333,
1518
+ "step": 900
1519
+ },
1520
+ {
1521
+ "epoch": 1.9052604030358546,
1522
+ "grad_norm": 31.165480893885753,
1523
+ "learning_rate": 3.5270572118669715e-07,
1524
+ "logits/chosen": -0.11026148498058319,
1525
+ "logits/rejected": -0.16978046298027039,
1526
+ "logps/chosen": -278.1309509277344,
1527
+ "logps/rejected": -235.88150024414062,
1528
+ "loss": 0.5272,
1529
+ "rewards/accuracies": 0.762499988079071,
1530
+ "rewards/chosen": 0.02903774380683899,
1531
+ "rewards/margins": 0.8374470472335815,
1532
+ "rewards/rejected": -0.8084093332290649,
1533
+ "step": 910
1534
+ },
1535
+ {
1536
+ "epoch": 1.926197330541743,
1537
+ "grad_norm": 40.996379136014625,
1538
+ "learning_rate": 3.4108727208319314e-07,
1539
+ "logits/chosen": -0.17259056866168976,
1540
+ "logits/rejected": -0.20306821167469025,
1541
+ "logps/chosen": -312.1188049316406,
1542
+ "logps/rejected": -266.6794128417969,
1543
+ "loss": 0.5039,
1544
+ "rewards/accuracies": 0.737500011920929,
1545
+ "rewards/chosen": 0.16847112774848938,
1546
+ "rewards/margins": 0.8200035095214844,
1547
+ "rewards/rejected": -0.6515323519706726,
1548
+ "step": 920
1549
+ },
1550
+ {
1551
+ "epoch": 1.9471342580476314,
1552
+ "grad_norm": 31.825399091982387,
1553
+ "learning_rate": 3.295635076714144e-07,
1554
+ "logits/chosen": -0.1496247947216034,
1555
+ "logits/rejected": -0.17214402556419373,
1556
+ "logps/chosen": -301.103515625,
1557
+ "logps/rejected": -270.370361328125,
1558
+ "loss": 0.4903,
1559
+ "rewards/accuracies": 0.78125,
1560
+ "rewards/chosen": 0.16043229401111603,
1561
+ "rewards/margins": 0.7891928553581238,
1562
+ "rewards/rejected": -0.628760576248169,
1563
+ "step": 930
1564
+ },
1565
+ {
1566
+ "epoch": 1.96807118555352,
1567
+ "grad_norm": 39.834235726892466,
1568
+ "learning_rate": 3.181412941356816e-07,
1569
+ "logits/chosen": -0.13259033858776093,
1570
+ "logits/rejected": -0.11601094901561737,
1571
+ "logps/chosen": -266.5997314453125,
1572
+ "logps/rejected": -231.60848999023438,
1573
+ "loss": 0.485,
1574
+ "rewards/accuracies": 0.78125,
1575
+ "rewards/chosen": 0.19742903113365173,
1576
+ "rewards/margins": 0.8145695924758911,
1577
+ "rewards/rejected": -0.6171405911445618,
1578
+ "step": 940
1579
+ },
1580
+ {
1581
+ "epoch": 1.9890081130594086,
1582
+ "grad_norm": 35.65864774290716,
1583
+ "learning_rate": 3.068274371534356e-07,
1584
+ "logits/chosen": -0.11888673156499863,
1585
+ "logits/rejected": -0.2420310229063034,
1586
+ "logps/chosen": -301.62213134765625,
1587
+ "logps/rejected": -250.2771453857422,
1588
+ "loss": 0.4887,
1589
+ "rewards/accuracies": 0.768750011920929,
1590
+ "rewards/chosen": 0.002846676157787442,
1591
+ "rewards/margins": 0.7688849568367004,
1592
+ "rewards/rejected": -0.7660382986068726,
1593
+ "step": 950
1594
+ },
1595
+ {
1596
+ "epoch": 2.009945040565297,
1597
+ "grad_norm": 28.814451971980255,
1598
+ "learning_rate": 2.956286778402226e-07,
1599
+ "logits/chosen": -0.10088515281677246,
1600
+ "logits/rejected": -0.12641310691833496,
1601
+ "logps/chosen": -301.3664855957031,
1602
+ "logps/rejected": -283.16461181640625,
1603
+ "loss": 0.4567,
1604
+ "rewards/accuracies": 0.762499988079071,
1605
+ "rewards/chosen": 0.20988380908966064,
1606
+ "rewards/margins": 0.9322754740715027,
1607
+ "rewards/rejected": -0.7223917245864868,
1608
+ "step": 960
1609
+ },
1610
+ {
1611
+ "epoch": 2.0308819680711854,
1612
+ "grad_norm": 28.79413061525845,
1613
+ "learning_rate": 2.84551688733146e-07,
1614
+ "logits/chosen": -0.07364392280578613,
1615
+ "logits/rejected": -0.1452295333147049,
1616
+ "logps/chosen": -299.0059509277344,
1617
+ "logps/rejected": -238.53231811523438,
1618
+ "loss": 0.4557,
1619
+ "rewards/accuracies": 0.84375,
1620
+ "rewards/chosen": 0.166302889585495,
1621
+ "rewards/margins": 0.9308949708938599,
1622
+ "rewards/rejected": -0.7645919919013977,
1623
+ "step": 970
1624
+ },
1625
+ {
1626
+ "epoch": 2.051818895577074,
1627
+ "grad_norm": 31.23059423273923,
1628
+ "learning_rate": 2.7360306981518147e-07,
1629
+ "logits/chosen": -0.09392131119966507,
1630
+ "logits/rejected": -0.047506559640169144,
1631
+ "logps/chosen": -262.2680969238281,
1632
+ "logps/rejected": -253.03271484375,
1633
+ "loss": 0.4445,
1634
+ "rewards/accuracies": 0.8125,
1635
+ "rewards/chosen": 0.040274180471897125,
1636
+ "rewards/margins": 0.7928990125656128,
1637
+ "rewards/rejected": -0.7526248097419739,
1638
+ "step": 980
1639
+ },
1640
+ {
1641
+ "epoch": 2.0727558230829626,
1642
+ "grad_norm": 27.855961833782285,
1643
+ "learning_rate": 2.6278934458271996e-07,
1644
+ "logits/chosen": -0.1288485825061798,
1645
+ "logits/rejected": -0.1765173375606537,
1646
+ "logps/chosen": -314.7231750488281,
1647
+ "logps/rejected": -302.44964599609375,
1648
+ "loss": 0.4327,
1649
+ "rewards/accuracies": 0.800000011920929,
1650
+ "rewards/chosen": 0.18189235031604767,
1651
+ "rewards/margins": 1.0036437511444092,
1652
+ "rewards/rejected": -0.8217514157295227,
1653
+ "step": 990
1654
+ },
1655
+ {
1656
+ "epoch": 2.093692750588851,
1657
+ "grad_norm": 35.285113595842645,
1658
+ "learning_rate": 2.5211695615868456e-07,
1659
+ "logits/chosen": -0.1857554316520691,
1660
+ "logits/rejected": -0.3039974272251129,
1661
+ "logps/chosen": -281.1855163574219,
1662
+ "logps/rejected": -240.62710571289062,
1663
+ "loss": 0.4622,
1664
+ "rewards/accuracies": 0.793749988079071,
1665
+ "rewards/chosen": 0.15526309609413147,
1666
+ "rewards/margins": 0.9393559694290161,
1667
+ "rewards/rejected": -0.7840928435325623,
1668
+ "step": 1000
1669
+ },
1670
+ {
1671
+ "epoch": 2.093692750588851,
1672
+ "eval_logits/chosen": -0.3450426459312439,
1673
+ "eval_logits/rejected": -0.37242141366004944,
1674
+ "eval_logps/chosen": -310.4169006347656,
1675
+ "eval_logps/rejected": -275.49713134765625,
1676
+ "eval_loss": 0.59389328956604,
1677
+ "eval_rewards/accuracies": 0.6944444179534912,
1678
+ "eval_rewards/chosen": -0.06623569875955582,
1679
+ "eval_rewards/margins": 0.5405489802360535,
1680
+ "eval_rewards/rejected": -0.6067846417427063,
1681
+ "eval_runtime": 19.108,
1682
+ "eval_samples_per_second": 104.668,
1683
+ "eval_steps_per_second": 3.297,
1684
+ "step": 1000
1685
+ },
1686
+ {
1687
+ "epoch": 2.11462967809474,
1688
+ "grad_norm": 32.59720699053956,
1689
+ "learning_rate": 2.4159226345353647e-07,
1690
+ "logits/chosen": -0.30413442850112915,
1691
+ "logits/rejected": -0.24426865577697754,
1692
+ "logps/chosen": -270.49920654296875,
1693
+ "logps/rejected": -255.44723510742188,
1694
+ "loss": 0.4508,
1695
+ "rewards/accuracies": 0.7562500238418579,
1696
+ "rewards/chosen": 0.03533598408102989,
1697
+ "rewards/margins": 0.7992294430732727,
1698
+ "rewards/rejected": -0.7638934254646301,
1699
+ "step": 1010
1700
+ },
1701
+ {
1702
+ "epoch": 2.135566605600628,
1703
+ "grad_norm": 30.8340208692361,
1704
+ "learning_rate": 2.312215373764551e-07,
1705
+ "logits/chosen": -0.07855603843927383,
1706
+ "logits/rejected": -0.094606414437294,
1707
+ "logps/chosen": -289.9374084472656,
1708
+ "logps/rejected": -277.69586181640625,
1709
+ "loss": 0.4353,
1710
+ "rewards/accuracies": 0.8125,
1711
+ "rewards/chosen": 0.19073505699634552,
1712
+ "rewards/margins": 0.8868581652641296,
1713
+ "rewards/rejected": -0.6961231231689453,
1714
+ "step": 1020
1715
+ },
1716
+ {
1717
+ "epoch": 2.1565035331065165,
1718
+ "grad_norm": 29.34387598294554,
1719
+ "learning_rate": 2.2101095709895512e-07,
1720
+ "logits/chosen": 0.009572875685989857,
1721
+ "logits/rejected": -0.09117701649665833,
1722
+ "logps/chosen": -282.7410583496094,
1723
+ "logps/rejected": -239.8011932373047,
1724
+ "loss": 0.438,
1725
+ "rewards/accuracies": 0.793749988079071,
1726
+ "rewards/chosen": 0.12627363204956055,
1727
+ "rewards/margins": 0.8894168138504028,
1728
+ "rewards/rejected": -0.7631431221961975,
1729
+ "step": 1030
1730
+ },
1731
+ {
1732
+ "epoch": 2.177440460612405,
1733
+ "grad_norm": 29.466746389871066,
1734
+ "learning_rate": 2.1096660637315932e-07,
1735
+ "logits/chosen": -0.0853545218706131,
1736
+ "logits/rejected": -0.1613493412733078,
1737
+ "logps/chosen": -309.2115783691406,
1738
+ "logps/rejected": -279.7373352050781,
1739
+ "loss": 0.4374,
1740
+ "rewards/accuracies": 0.856249988079071,
1741
+ "rewards/chosen": 0.2535983920097351,
1742
+ "rewards/margins": 1.087397575378418,
1743
+ "rewards/rejected": -0.8337991833686829,
1744
+ "step": 1040
1745
+ },
1746
+ {
1747
+ "epoch": 2.1983773881182938,
1748
+ "grad_norm": 31.3324256474218,
1749
+ "learning_rate": 2.0109446990692963e-07,
1750
+ "logits/chosen": -0.17987683415412903,
1751
+ "logits/rejected": -0.24151673913002014,
1752
+ "logps/chosen": -323.58477783203125,
1753
+ "logps/rejected": -268.08111572265625,
1754
+ "loss": 0.4365,
1755
+ "rewards/accuracies": 0.75,
1756
+ "rewards/chosen": 0.09657944738864899,
1757
+ "rewards/margins": 0.8588522672653198,
1758
+ "rewards/rejected": -0.7622728943824768,
1759
+ "step": 1050
1760
+ },
1761
+ {
1762
+ "epoch": 2.219314315624182,
1763
+ "grad_norm": 33.25666630785023,
1764
+ "learning_rate": 1.9140042979800737e-07,
1765
+ "logits/chosen": -0.13123273849487305,
1766
+ "logits/rejected": -0.12296972423791885,
1767
+ "logps/chosen": -260.5819396972656,
1768
+ "logps/rejected": -232.4111328125,
1769
+ "loss": 0.4355,
1770
+ "rewards/accuracies": 0.7437499761581421,
1771
+ "rewards/chosen": 0.05866159126162529,
1772
+ "rewards/margins": 0.7927514314651489,
1773
+ "rewards/rejected": -0.7340899109840393,
1774
+ "step": 1060
1775
+ },
1776
+ {
1777
+ "epoch": 2.2402512431300705,
1778
+ "grad_norm": 27.925843344985296,
1779
+ "learning_rate": 1.8189026202929391e-07,
1780
+ "logits/chosen": -0.1511206328868866,
1781
+ "logits/rejected": -0.17038318514823914,
1782
+ "logps/chosen": -290.31951904296875,
1783
+ "logps/rejected": -260.115966796875,
1784
+ "loss": 0.4534,
1785
+ "rewards/accuracies": 0.793749988079071,
1786
+ "rewards/chosen": 0.11966502666473389,
1787
+ "rewards/margins": 0.9545691609382629,
1788
+ "rewards/rejected": -0.8349041938781738,
1789
+ "step": 1070
1790
+ },
1791
+ {
1792
+ "epoch": 2.2611881706359593,
1793
+ "grad_norm": 26.475722050618995,
1794
+ "learning_rate": 1.725696330273575e-07,
1795
+ "logits/chosen": -0.08606094121932983,
1796
+ "logits/rejected": -0.1871510148048401,
1797
+ "logps/chosen": -288.9709777832031,
1798
+ "logps/rejected": -249.9591064453125,
1799
+ "loss": 0.4441,
1800
+ "rewards/accuracies": 0.793749988079071,
1801
+ "rewards/chosen": 0.19028839468955994,
1802
+ "rewards/margins": 1.0648086071014404,
1803
+ "rewards/rejected": -0.8745201826095581,
1804
+ "step": 1080
1805
+ },
1806
+ {
1807
+ "epoch": 2.2821250981418477,
1808
+ "grad_norm": 28.63389247775027,
1809
+ "learning_rate": 1.634440962862148e-07,
1810
+ "logits/chosen": -0.05144480615854263,
1811
+ "logits/rejected": -0.04060221463441849,
1812
+ "logps/chosen": -295.44122314453125,
1813
+ "logps/rejected": -262.346435546875,
1814
+ "loss": 0.4318,
1815
+ "rewards/accuracies": 0.8062499761581421,
1816
+ "rewards/chosen": 0.20048430562019348,
1817
+ "rewards/margins": 0.9130541086196899,
1818
+ "rewards/rejected": -0.7125697135925293,
1819
+ "step": 1090
1820
+ },
1821
+ {
1822
+ "epoch": 2.303062025647736,
1823
+ "grad_norm": 28.577071054830697,
1824
+ "learning_rate": 1.545190890584042e-07,
1825
+ "logits/chosen": -0.23821565508842468,
1826
+ "logits/rejected": -0.23792441189289093,
1827
+ "logps/chosen": -287.6651306152344,
1828
+ "logps/rejected": -261.96875,
1829
+ "loss": 0.4458,
1830
+ "rewards/accuracies": 0.8125,
1831
+ "rewards/chosen": 0.24947874248027802,
1832
+ "rewards/margins": 0.945774257183075,
1833
+ "rewards/rejected": -0.696295440196991,
1834
+ "step": 1100
1835
+ },
1836
+ {
1837
+ "epoch": 2.303062025647736,
1838
+ "eval_logits/chosen": -0.3449550271034241,
1839
+ "eval_logits/rejected": -0.3728056252002716,
1840
+ "eval_logps/chosen": -310.3917541503906,
1841
+ "eval_logps/rejected": -275.5622253417969,
1842
+ "eval_loss": 0.5923458933830261,
1843
+ "eval_rewards/accuracies": 0.6944444179534912,
1844
+ "eval_rewards/chosen": -0.05364745110273361,
1845
+ "eval_rewards/margins": 0.5857000946998596,
1846
+ "eval_rewards/rejected": -0.639347493648529,
1847
+ "eval_runtime": 19.0051,
1848
+ "eval_samples_per_second": 105.235,
1849
+ "eval_steps_per_second": 3.315,
1850
+ "step": 1100
1851
+ },
1852
+ {
1853
+ "epoch": 2.323998953153625,
1854
+ "grad_norm": 30.512738174296523,
1855
+ "learning_rate": 1.4579992911531496e-07,
1856
+ "logits/chosen": -0.2525126338005066,
1857
+ "logits/rejected": -0.3065794110298157,
1858
+ "logps/chosen": -325.3319091796875,
1859
+ "logps/rejected": -288.85089111328125,
1860
+ "loss": 0.4406,
1861
+ "rewards/accuracies": 0.8187500238418579,
1862
+ "rewards/chosen": 0.28674450516700745,
1863
+ "rewards/margins": 1.1190677881240845,
1864
+ "rewards/rejected": -0.8323231935501099,
1865
+ "step": 1110
1866
+ },
1867
+ {
1868
+ "epoch": 2.3449358806595133,
1869
+ "grad_norm": 28.019655950524385,
1870
+ "learning_rate": 1.372918115787112e-07,
1871
+ "logits/chosen": -0.19494856894016266,
1872
+ "logits/rejected": -0.16709737479686737,
1873
+ "logps/chosen": -269.15802001953125,
1874
+ "logps/rejected": -281.6461486816406,
1875
+ "loss": 0.4203,
1876
+ "rewards/accuracies": 0.862500011920929,
1877
+ "rewards/chosen": 0.2475571632385254,
1878
+ "rewards/margins": 1.078798770904541,
1879
+ "rewards/rejected": -0.8312416076660156,
1880
+ "step": 1120
1881
+ },
1882
+ {
1883
+ "epoch": 2.3658728081654017,
1884
+ "grad_norm": 30.412163422048742,
1885
+ "learning_rate": 1.289998058253297e-07,
1886
+ "logits/chosen": -0.30280619859695435,
1887
+ "logits/rejected": -0.23964910209178925,
1888
+ "logps/chosen": -300.67291259765625,
1889
+ "logps/rejected": -286.5229187011719,
1890
+ "loss": 0.4429,
1891
+ "rewards/accuracies": 0.8374999761581421,
1892
+ "rewards/chosen": 0.10387909412384033,
1893
+ "rewards/margins": 0.9766238331794739,
1894
+ "rewards/rejected": -0.8727447390556335,
1895
+ "step": 1130
1896
+ },
1897
+ {
1898
+ "epoch": 2.38680973567129,
1899
+ "grad_norm": 26.079069616215104,
1900
+ "learning_rate": 1.209288524664029e-07,
1901
+ "logits/chosen": -0.09895231574773788,
1902
+ "logits/rejected": -0.04007222130894661,
1903
+ "logps/chosen": -295.33795166015625,
1904
+ "logps/rejected": -295.7159118652344,
1905
+ "loss": 0.4524,
1906
+ "rewards/accuracies": 0.8125,
1907
+ "rewards/chosen": 0.3174125850200653,
1908
+ "rewards/margins": 1.068551778793335,
1909
+ "rewards/rejected": -0.7511391639709473,
1910
+ "step": 1140
1911
+ },
1912
+ {
1913
+ "epoch": 2.407746663177179,
1914
+ "grad_norm": 35.04317303338736,
1915
+ "learning_rate": 1.1308376040390344e-07,
1916
+ "logits/chosen": -0.2176963984966278,
1917
+ "logits/rejected": -0.2429191619157791,
1918
+ "logps/chosen": -268.51568603515625,
1919
+ "logps/rejected": -243.83602905273438,
1920
+ "loss": 0.4648,
1921
+ "rewards/accuracies": 0.768750011920929,
1922
+ "rewards/chosen": 0.1225685253739357,
1923
+ "rewards/margins": 0.8287593722343445,
1924
+ "rewards/rejected": -0.7061909437179565,
1925
+ "step": 1150
1926
+ },
1927
+ {
1928
+ "epoch": 2.4286835906830673,
1929
+ "grad_norm": 27.071258876733747,
1930
+ "learning_rate": 1.0546920396526382e-07,
1931
+ "logits/chosen": -0.05287874490022659,
1932
+ "logits/rejected": -0.08571253716945648,
1933
+ "logps/chosen": -345.3739013671875,
1934
+ "logps/rejected": -296.2100524902344,
1935
+ "loss": 0.4248,
1936
+ "rewards/accuracies": 0.824999988079071,
1937
+ "rewards/chosen": 0.31707775592803955,
1938
+ "rewards/margins": 1.1236298084259033,
1939
+ "rewards/rejected": -0.8065520524978638,
1940
+ "step": 1160
1941
+ },
1942
+ {
1943
+ "epoch": 2.4496205181889557,
1944
+ "grad_norm": 30.813022693471254,
1945
+ "learning_rate": 9.808972011828054e-08,
1946
+ "logits/chosen": -0.07085756957530975,
1947
+ "logits/rejected": -0.1990344077348709,
1948
+ "logps/chosen": -342.7474060058594,
1949
+ "logps/rejected": -266.599609375,
1950
+ "loss": 0.4366,
1951
+ "rewards/accuracies": 0.831250011920929,
1952
+ "rewards/chosen": 0.2785649299621582,
1953
+ "rewards/margins": 1.1454359292984009,
1954
+ "rewards/rejected": -0.8668708801269531,
1955
+ "step": 1170
1956
+ },
1957
+ {
1958
+ "epoch": 2.470557445694844,
1959
+ "grad_norm": 28.23769321193845,
1960
+ "learning_rate": 9.094970576786032e-08,
1961
+ "logits/chosen": 0.02408856526017189,
1962
+ "logits/rejected": -0.0395381897687912,
1963
+ "logps/chosen": -267.3003234863281,
1964
+ "logps/rejected": -249.65158081054688,
1965
+ "loss": 0.4323,
1966
+ "rewards/accuracies": 0.824999988079071,
1967
+ "rewards/chosen": 0.17695440351963043,
1968
+ "rewards/margins": 1.0320888757705688,
1969
+ "rewards/rejected": -0.8551343679428101,
1970
+ "step": 1180
1971
+ },
1972
+ {
1973
+ "epoch": 2.491494373200733,
1974
+ "grad_norm": 30.361270294186127,
1975
+ "learning_rate": 8.405341513622055e-08,
1976
+ "logits/chosen": -0.16236011683940887,
1977
+ "logits/rejected": -0.20971974730491638,
1978
+ "logps/chosen": -324.1960754394531,
1979
+ "logps/rejected": -260.82647705078125,
1980
+ "loss": 0.4368,
1981
+ "rewards/accuracies": 0.831250011920929,
1982
+ "rewards/chosen": 0.3019522428512573,
1983
+ "rewards/margins": 0.9704391360282898,
1984
+ "rewards/rejected": -0.6684868931770325,
1985
+ "step": 1190
1986
+ },
1987
+ {
1988
+ "epoch": 2.5124313007066212,
1989
+ "grad_norm": 29.87892753836197,
1990
+ "learning_rate": 7.740495722810269e-08,
1991
+ "logits/chosen": -0.1393619179725647,
1992
+ "logits/rejected": -0.17410680651664734,
1993
+ "logps/chosen": -275.013427734375,
1994
+ "logps/rejected": -218.1080322265625,
1995
+ "loss": 0.4462,
1996
+ "rewards/accuracies": 0.768750011920929,
1997
+ "rewards/chosen": 0.1310921609401703,
1998
+ "rewards/margins": 0.8609299659729004,
1999
+ "rewards/rejected": -0.7298377156257629,
2000
+ "step": 1200
2001
+ },
2002
+ {
2003
+ "epoch": 2.5124313007066212,
2004
+ "eval_logits/chosen": -0.34315019845962524,
2005
+ "eval_logits/rejected": -0.37096092104911804,
2006
+ "eval_logps/chosen": -310.3815612792969,
2007
+ "eval_logps/rejected": -275.54351806640625,
2008
+ "eval_loss": 0.589375913143158,
2009
+ "eval_rewards/accuracies": 0.7023809552192688,
2010
+ "eval_rewards/chosen": -0.04855651780962944,
2011
+ "eval_rewards/margins": 0.5814378261566162,
2012
+ "eval_rewards/rejected": -0.6299943923950195,
2013
+ "eval_runtime": 19.1944,
2014
+ "eval_samples_per_second": 104.197,
2015
+ "eval_steps_per_second": 3.282,
2016
+ "step": 1200
2017
+ },
2018
+ {
2019
+ "epoch": 2.5333682282125096,
2020
+ "grad_norm": 30.713812818544117,
2021
+ "learning_rate": 7.100829338251146e-08,
2022
+ "logits/chosen": -0.10057993978261948,
2023
+ "logits/rejected": -0.16287049651145935,
2024
+ "logps/chosen": -328.2908935546875,
2025
+ "logps/rejected": -289.701904296875,
2026
+ "loss": 0.4416,
2027
+ "rewards/accuracies": 0.8187500238418579,
2028
+ "rewards/chosen": 0.3149394094944,
2029
+ "rewards/margins": 1.1269749402999878,
2030
+ "rewards/rejected": -0.8120356798171997,
2031
+ "step": 1210
2032
+ },
2033
+ {
2034
+ "epoch": 2.5543051557183984,
2035
+ "grad_norm": 32.721460368023834,
2036
+ "learning_rate": 6.486723491243778e-08,
2037
+ "logits/chosen": -0.14739738404750824,
2038
+ "logits/rejected": -0.1756991297006607,
2039
+ "logps/chosen": -287.8624572753906,
2040
+ "logps/rejected": -264.6856994628906,
2041
+ "loss": 0.4523,
2042
+ "rewards/accuracies": 0.862500011920929,
2043
+ "rewards/chosen": 0.20502778887748718,
2044
+ "rewards/margins": 0.9769600033760071,
2045
+ "rewards/rejected": -0.7719322443008423,
2046
+ "step": 1220
2047
+ },
2048
+ {
2049
+ "epoch": 2.575242083224287,
2050
+ "grad_norm": 32.50553549993069,
2051
+ "learning_rate": 5.898544083397e-08,
2052
+ "logits/chosen": -0.12704332172870636,
2053
+ "logits/rejected": -0.1398163139820099,
2054
+ "logps/chosen": -319.8119201660156,
2055
+ "logps/rejected": -287.4822082519531,
2056
+ "loss": 0.4467,
2057
+ "rewards/accuracies": 0.8374999761581421,
2058
+ "rewards/chosen": 0.2838557958602905,
2059
+ "rewards/margins": 1.0057270526885986,
2060
+ "rewards/rejected": -0.7218713164329529,
2061
+ "step": 1230
2062
+ },
2063
+ {
2064
+ "epoch": 2.596179010730175,
2065
+ "grad_norm": 30.599102169827287,
2066
+ "learning_rate": 5.3366415686149137e-08,
2067
+ "logits/chosen": -0.1681247055530548,
2068
+ "logits/rejected": -0.15492555499076843,
2069
+ "logps/chosen": -248.78054809570312,
2070
+ "logps/rejected": -238.68106079101562,
2071
+ "loss": 0.4368,
2072
+ "rewards/accuracies": 0.7875000238418579,
2073
+ "rewards/chosen": 0.10712571442127228,
2074
+ "rewards/margins": 0.8596469759941101,
2075
+ "rewards/rejected": -0.7525211572647095,
2076
+ "step": 1240
2077
+ },
2078
+ {
2079
+ "epoch": 2.617115938236064,
2080
+ "grad_norm": 31.156979603329955,
2081
+ "learning_rate": 4.8013507442865585e-08,
2082
+ "logits/chosen": -0.19022394716739655,
2083
+ "logits/rejected": -0.10595899820327759,
2084
+ "logps/chosen": -271.9454345703125,
2085
+ "logps/rejected": -274.1131286621094,
2086
+ "loss": 0.4319,
2087
+ "rewards/accuracies": 0.7875000238418579,
2088
+ "rewards/chosen": 0.17164357006549835,
2089
+ "rewards/margins": 0.7940610647201538,
2090
+ "rewards/rejected": -0.6224175095558167,
2091
+ "step": 1250
2092
+ },
2093
+ {
2094
+ "epoch": 2.6380528657419524,
2095
+ "grad_norm": 28.881054799580607,
2096
+ "learning_rate": 4.292990551804171e-08,
2097
+ "logits/chosen": -0.08757440745830536,
2098
+ "logits/rejected": -0.08349265158176422,
2099
+ "logps/chosen": -287.8446350097656,
2100
+ "logps/rejected": -251.55636596679688,
2101
+ "loss": 0.4229,
2102
+ "rewards/accuracies": 0.793749988079071,
2103
+ "rewards/chosen": 0.27148956060409546,
2104
+ "rewards/margins": 1.0949037075042725,
2105
+ "rewards/rejected": -0.8234142065048218,
2106
+ "step": 1260
2107
+ },
2108
+ {
2109
+ "epoch": 2.658989793247841,
2110
+ "grad_norm": 27.816379754479705,
2111
+ "learning_rate": 3.811863886528882e-08,
2112
+ "logits/chosen": -0.05129946395754814,
2113
+ "logits/rejected": -0.07520854473114014,
2114
+ "logps/chosen": -320.88092041015625,
2115
+ "logps/rejected": -259.69488525390625,
2116
+ "loss": 0.4403,
2117
+ "rewards/accuracies": 0.824999988079071,
2118
+ "rewards/chosen": 0.1268620789051056,
2119
+ "rewards/margins": 0.9729422330856323,
2120
+ "rewards/rejected": -0.8460801839828491,
2121
+ "step": 1270
2122
+ },
2123
+ {
2124
+ "epoch": 2.6799267207537296,
2125
+ "grad_norm": 31.45496593116063,
2126
+ "learning_rate": 3.358257417317095e-08,
2127
+ "logits/chosen": -0.08449853211641312,
2128
+ "logits/rejected": -0.1029936671257019,
2129
+ "logps/chosen": -288.6929016113281,
2130
+ "logps/rejected": -251.7803192138672,
2131
+ "loss": 0.4427,
2132
+ "rewards/accuracies": 0.737500011920929,
2133
+ "rewards/chosen": 0.18886741995811462,
2134
+ "rewards/margins": 0.9686278104782104,
2135
+ "rewards/rejected": -0.779760479927063,
2136
+ "step": 1280
2137
+ },
2138
+ {
2139
+ "epoch": 2.700863648259618,
2140
+ "grad_norm": 29.618711306226224,
2141
+ "learning_rate": 2.9324414157151367e-08,
2142
+ "logits/chosen": -0.1431390345096588,
2143
+ "logits/rejected": -0.10319207608699799,
2144
+ "logps/chosen": -265.52508544921875,
2145
+ "logps/rejected": -260.0169677734375,
2146
+ "loss": 0.4661,
2147
+ "rewards/accuracies": 0.7875000238418579,
2148
+ "rewards/chosen": 0.10996869951486588,
2149
+ "rewards/margins": 0.7998045682907104,
2150
+ "rewards/rejected": -0.6898358464241028,
2151
+ "step": 1290
2152
+ },
2153
+ {
2154
+ "epoch": 2.7218005757655064,
2155
+ "grad_norm": 30.57983222127276,
2156
+ "learning_rate": 2.5346695949237717e-08,
2157
+ "logits/chosen": -0.1215812936425209,
2158
+ "logits/rejected": -0.1575727015733719,
2159
+ "logps/chosen": -285.28839111328125,
2160
+ "logps/rejected": -258.9991455078125,
2161
+ "loss": 0.4312,
2162
+ "rewards/accuracies": 0.8062499761581421,
2163
+ "rewards/chosen": 0.16850288212299347,
2164
+ "rewards/margins": 0.923494815826416,
2165
+ "rewards/rejected": -0.7549919486045837,
2166
+ "step": 1300
2167
+ },
2168
+ {
2169
+ "epoch": 2.7218005757655064,
2170
+ "eval_logits/chosen": -0.3442156910896301,
2171
+ "eval_logits/rejected": -0.37236616015434265,
2172
+ "eval_logps/chosen": -310.4347229003906,
2173
+ "eval_logps/rejected": -275.5621337890625,
2174
+ "eval_loss": 0.5860841870307922,
2175
+ "eval_rewards/accuracies": 0.6666666865348816,
2176
+ "eval_rewards/chosen": -0.07512283325195312,
2177
+ "eval_rewards/margins": 0.5641648769378662,
2178
+ "eval_rewards/rejected": -0.6392877101898193,
2179
+ "eval_runtime": 18.9961,
2180
+ "eval_samples_per_second": 105.285,
2181
+ "eval_steps_per_second": 3.316,
2182
+ "step": 1300
2183
+ },
2184
+ {
2185
+ "epoch": 2.7427375032713948,
2186
+ "grad_norm": 33.466317771235126,
2187
+ "learning_rate": 2.165178958628744e-08,
2188
+ "logits/chosen": -0.2976987659931183,
2189
+ "logits/rejected": -0.2995850145816803,
2190
+ "logps/chosen": -273.274658203125,
2191
+ "logps/rejected": -265.376953125,
2192
+ "loss": 0.4557,
2193
+ "rewards/accuracies": 0.8187500238418579,
2194
+ "rewards/chosen": 0.248153418302536,
2195
+ "rewards/margins": 0.9688236117362976,
2196
+ "rewards/rejected": -0.720670223236084,
2197
+ "step": 1310
2198
+ },
2199
+ {
2200
+ "epoch": 2.7636744307772836,
2201
+ "grad_norm": 34.66631251738303,
2202
+ "learning_rate": 1.824189659787284e-08,
2203
+ "logits/chosen": -0.10840437561273575,
2204
+ "logits/rejected": -0.2128714621067047,
2205
+ "logps/chosen": -305.4066162109375,
2206
+ "logps/rejected": -265.4433288574219,
2207
+ "loss": 0.4569,
2208
+ "rewards/accuracies": 0.7875000238418579,
2209
+ "rewards/chosen": 0.2427886724472046,
2210
+ "rewards/margins": 0.9924157857894897,
2211
+ "rewards/rejected": -0.7496271133422852,
2212
+ "step": 1320
2213
+ },
2214
+ {
2215
+ "epoch": 2.784611358283172,
2216
+ "grad_norm": 32.12579125137935,
2217
+ "learning_rate": 1.511904869454772e-08,
2218
+ "logits/chosen": -0.2719363570213318,
2219
+ "logits/rejected": -0.282664954662323,
2220
+ "logps/chosen": -258.68927001953125,
2221
+ "logps/rejected": -247.0824737548828,
2222
+ "loss": 0.4442,
2223
+ "rewards/accuracies": 0.800000011920929,
2224
+ "rewards/chosen": 0.14507904648780823,
2225
+ "rewards/margins": 0.9792217016220093,
2226
+ "rewards/rejected": -0.8341425657272339,
2227
+ "step": 1330
2228
+ },
2229
+ {
2230
+ "epoch": 2.8055482857890603,
2231
+ "grad_norm": 30.777443115008786,
2232
+ "learning_rate": 1.2285106557296476e-08,
2233
+ "logits/chosen": -0.005404007621109486,
2234
+ "logits/rejected": -0.11788828670978546,
2235
+ "logps/chosen": -291.2290954589844,
2236
+ "logps/rejected": -259.48468017578125,
2237
+ "loss": 0.4343,
2238
+ "rewards/accuracies": 0.856249988079071,
2239
+ "rewards/chosen": 0.14346763491630554,
2240
+ "rewards/margins": 0.9186245203018188,
2241
+ "rewards/rejected": -0.7751568555831909,
2242
+ "step": 1340
2243
+ },
2244
+ {
2245
+ "epoch": 2.8264852132949487,
2246
+ "grad_norm": 30.795896785948266,
2247
+ "learning_rate": 9.741758728888217e-09,
2248
+ "logits/chosen": -0.21444204449653625,
2249
+ "logits/rejected": -0.22242781519889832,
2250
+ "logps/chosen": -273.7785949707031,
2251
+ "logps/rejected": -255.1972198486328,
2252
+ "loss": 0.4474,
2253
+ "rewards/accuracies": 0.800000011920929,
2254
+ "rewards/chosen": 0.1873357743024826,
2255
+ "rewards/margins": 0.925756573677063,
2256
+ "rewards/rejected": -0.7384207844734192,
2257
+ "step": 1350
2258
+ },
2259
+ {
2260
+ "epoch": 2.8474221408008376,
2261
+ "grad_norm": 32.2751606467674,
2262
+ "learning_rate": 7.490520607794981e-09,
2263
+ "logits/chosen": -0.23268666863441467,
2264
+ "logits/rejected": -0.24138864874839783,
2265
+ "logps/chosen": -321.114990234375,
2266
+ "logps/rejected": -286.67596435546875,
2267
+ "loss": 0.4185,
2268
+ "rewards/accuracies": 0.824999988079071,
2269
+ "rewards/chosen": 0.2826289236545563,
2270
+ "rewards/margins": 1.1657564640045166,
2271
+ "rewards/rejected": -0.8831275701522827,
2272
+ "step": 1360
2273
+ },
2274
+ {
2275
+ "epoch": 2.868359068306726,
2276
+ "grad_norm": 35.16124310393445,
2277
+ "learning_rate": 5.532733545274781e-09,
2278
+ "logits/chosen": -0.15783634781837463,
2279
+ "logits/rejected": -0.04948469251394272,
2280
+ "logps/chosen": -257.70880126953125,
2281
+ "logps/rejected": -257.0361633300781,
2282
+ "loss": 0.4388,
2283
+ "rewards/accuracies": 0.78125,
2284
+ "rewards/chosen": 0.09304230660200119,
2285
+ "rewards/margins": 0.8344671130180359,
2286
+ "rewards/rejected": -0.7414248585700989,
2287
+ "step": 1370
2288
+ },
2289
+ {
2290
+ "epoch": 2.8892959958126143,
2291
+ "grad_norm": 28.606615842220055,
2292
+ "learning_rate": 3.869564046156459e-09,
2293
+ "logits/chosen": -0.187531977891922,
2294
+ "logits/rejected": -0.3003791868686676,
2295
+ "logps/chosen": -326.945068359375,
2296
+ "logps/rejected": -275.7688903808594,
2297
+ "loss": 0.4474,
2298
+ "rewards/accuracies": 0.7875000238418579,
2299
+ "rewards/chosen": 0.3150181770324707,
2300
+ "rewards/margins": 0.9270998239517212,
2301
+ "rewards/rejected": -0.6120817065238953,
2302
+ "step": 1380
2303
+ },
2304
+ {
2305
+ "epoch": 2.910232923318503,
2306
+ "grad_norm": 29.689707753715854,
2307
+ "learning_rate": 2.5020030738031052e-09,
2308
+ "logits/chosen": -0.06661088764667511,
2309
+ "logits/rejected": -0.03964465484023094,
2310
+ "logps/chosen": -326.2409973144531,
2311
+ "logps/rejected": -333.43048095703125,
2312
+ "loss": 0.4429,
2313
+ "rewards/accuracies": 0.8374999761581421,
2314
+ "rewards/chosen": 0.26736724376678467,
2315
+ "rewards/margins": 1.0932358503341675,
2316
+ "rewards/rejected": -0.8258686065673828,
2317
+ "step": 1390
2318
+ },
2319
+ {
2320
+ "epoch": 2.9311698508243915,
2321
+ "grad_norm": 28.977571013449776,
2322
+ "learning_rate": 1.4308654596684177e-09,
2323
+ "logits/chosen": -0.11069446802139282,
2324
+ "logits/rejected": -0.20247487723827362,
2325
+ "logps/chosen": -306.44610595703125,
2326
+ "logps/rejected": -266.9939270019531,
2327
+ "loss": 0.4454,
2328
+ "rewards/accuracies": 0.8187500238418579,
2329
+ "rewards/chosen": 0.33073872327804565,
2330
+ "rewards/margins": 1.0625050067901611,
2331
+ "rewards/rejected": -0.7317661046981812,
2332
+ "step": 1400
2333
+ },
2334
+ {
2335
+ "epoch": 2.9311698508243915,
2336
+ "eval_logits/chosen": -0.34011754393577576,
2337
+ "eval_logits/rejected": -0.36805158853530884,
2338
+ "eval_logps/chosen": -310.2955627441406,
2339
+ "eval_logps/rejected": -275.4775085449219,
2340
+ "eval_loss": 0.5941523313522339,
2341
+ "eval_rewards/accuracies": 0.6944444179534912,
2342
+ "eval_rewards/chosen": -0.005565390922129154,
2343
+ "eval_rewards/margins": 0.591437041759491,
2344
+ "eval_rewards/rejected": -0.5970024466514587,
2345
+ "eval_runtime": 19.2489,
2346
+ "eval_samples_per_second": 103.902,
2347
+ "eval_steps_per_second": 3.273,
2348
+ "step": 1400
2349
+ },
2350
+ {
2351
+ "epoch": 2.95210677833028,
2352
+ "grad_norm": 29.74874723420356,
2353
+ "learning_rate": 6.567894177967325e-10,
2354
+ "logits/chosen": -0.046352408826351166,
2355
+ "logits/rejected": -0.054122112691402435,
2356
+ "logps/chosen": -287.88287353515625,
2357
+ "logps/rejected": -262.4837646484375,
2358
+ "loss": 0.4161,
2359
+ "rewards/accuracies": 0.7875000238418579,
2360
+ "rewards/chosen": 0.250956654548645,
2361
+ "rewards/margins": 1.026626706123352,
2362
+ "rewards/rejected": -0.775670051574707,
2363
+ "step": 1410
2364
+ },
2365
+ {
2366
+ "epoch": 2.9730437058361687,
2367
+ "grad_norm": 29.04099364011046,
2368
+ "learning_rate": 1.802361645573125e-10,
2369
+ "logits/chosen": -0.15342023968696594,
2370
+ "logits/rejected": -0.1559501588344574,
2371
+ "logps/chosen": -272.7152404785156,
2372
+ "logps/rejected": -269.2229919433594,
2373
+ "loss": 0.4192,
2374
+ "rewards/accuracies": 0.824999988079071,
2375
+ "rewards/chosen": 0.05591464787721634,
2376
+ "rewards/margins": 1.0562889575958252,
2377
+ "rewards/rejected": -1.000374436378479,
2378
+ "step": 1420
2379
+ },
2380
+ {
2381
+ "epoch": 2.993980633342057,
2382
+ "grad_norm": 30.18988447303673,
2383
+ "learning_rate": 1.4896438384481846e-12,
2384
+ "logits/chosen": -0.20836491882801056,
2385
+ "logits/rejected": -0.19784407317638397,
2386
+ "logps/chosen": -317.96209716796875,
2387
+ "logps/rejected": -302.5794982910156,
2388
+ "loss": 0.4315,
2389
+ "rewards/accuracies": 0.78125,
2390
+ "rewards/chosen": 0.22425825893878937,
2391
+ "rewards/margins": 0.8794215321540833,
2392
+ "rewards/rejected": -0.6551632285118103,
2393
+ "step": 1430
2394
+ },
2395
+ {
2396
+ "epoch": 2.996074326092646,
2397
+ "step": 1431,
2398
+ "total_flos": 0.0,
2399
+ "train_loss": 0.5334697115221363,
2400
+ "train_runtime": 7355.3343,
2401
+ "train_samples_per_second": 24.935,
2402
+ "train_steps_per_second": 0.195
2403
+ }
2404
+ ],
2405
+ "logging_steps": 10,
2406
+ "max_steps": 1431,
2407
+ "num_input_tokens_seen": 0,
2408
+ "num_train_epochs": 3,
2409
+ "save_steps": 500,
2410
+ "stateful_callbacks": {
2411
+ "TrainerControl": {
2412
+ "args": {
2413
+ "should_epoch_stop": false,
2414
+ "should_evaluate": false,
2415
+ "should_log": false,
2416
+ "should_save": false,
2417
+ "should_training_stop": false
2418
+ },
2419
+ "attributes": {}
2420
+ }
2421
+ },
2422
+ "total_flos": 0.0,
2423
+ "train_batch_size": 2,
2424
+ "trial_name": null,
2425
+ "trial_params": null
2426
+ }
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7649586c424c337f6c403fdb617ac9d954daf9a7192f3afe5b6318f37e9bb19e
3
+ size 6520
ComfyUI/models/smol/SmolLM2-1.7B-Instruct/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-135M-Instruct/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
ComfyUI/models/smol/SmolLM2-135M-Instruct/README.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ language:
5
+ - en
6
+ pipeline_tag: text-generation
7
+ tags:
8
+ - safetensors
9
+ - onnx
10
+ - transformers.js
11
+ base_model:
12
+ - HuggingFaceTB/SmolLM2-135M
13
+ ---
14
+
15
+
16
+ # SmolLM2
17
+
18
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/3ntM63zkmxY2cNRhgY_Kl.png)
19
+
20
+ ## Table of Contents
21
+
22
+ 1. [Model Summary](##model-summary)
23
+ 2. [Limitations](##limitations)
24
+ 3. [Training](##training)
25
+ 4. [License](##license)
26
+ 5. [Citation](##citation)
27
+
28
+ ## Model Summary
29
+
30
+ SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
31
+
32
+ SmolLM2 demonstrates significant advances over its predecessor SmolLM1, particularly in instruction following, knowledge, reasoning. The 135M model was trained on 2 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new filtered datasets we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using [UltraFeedback](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized).
33
+
34
+ The instruct model additionally supports tasks such as text rewriting, summarization and function calling (for the 1.7B) thanks to datasets developed by [Argilla](https://huggingface.co/argilla) such as [Synth-APIGen-v0.1](https://huggingface.co/datasets/argilla/Synth-APIGen-v0.1).
35
+ You can find the SFT dataset here: https://huggingface.co/datasets/HuggingFaceTB/smol-smoltalk and finetuning code at https://github.com/huggingface/alignment-handbook/tree/main/recipes/smollm2
36
+
37
+ ### How to use
38
+
39
+ ### Transformers
40
+ ```bash
41
+ pip install transformers
42
+ ```
43
+
44
+ ```python
45
+ from transformers import AutoModelForCausalLM, AutoTokenizer
46
+ checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
47
+
48
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
49
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
50
+ # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
51
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
52
+
53
+ messages = [{"role": "user", "content": "What is gravity?"}]
54
+ input_text=tokenizer.apply_chat_template(messages, tokenize=False)
55
+ print(input_text)
56
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
57
+ outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
58
+ print(tokenizer.decode(outputs[0]))
59
+ ```
60
+
61
+ ### Chat in TRL
62
+ You can also use the TRL CLI to chat with the model from the terminal:
63
+ ```bash
64
+ pip install trl
65
+ trl chat --model_name_or_path HuggingFaceTB/SmolLM2-135M-Instruct --device cpu
66
+ ```
67
+
68
+ ## Evaluation
69
+
70
+ In this section, we report the evaluation results of SmolLM2. All evaluations are zero-shot unless stated otherwise, and we use [lighteval](https://github.com/huggingface/lighteval) to run them.
71
+
72
+ ## Base pre-trained model
73
+
74
+ | Metrics | SmolLM2-135M-8k | SmolLM-135M |
75
+ |:-------------------|:----------------:|:------------:|
76
+ | HellaSwag | **42.1** | 41.2 |
77
+ | ARC (Average) | **43.9** | 42.4 |
78
+ | PIQA | 68.4 | 68.4 |
79
+ | MMLU (cloze) | **31.5** | 30.2 |
80
+ | CommonsenseQA | **33.9** | 32.7 |
81
+ | TriviaQA | 4.1 | **4.3** |
82
+ | Winogrande | 51.3 | 51.3 |
83
+ | OpenBookQA | **34.6** | 34.0 |
84
+ | GSM8K (5-shot) | **1.4** | 1.0 |
85
+
86
+
87
+ ## Instruction model
88
+
89
+ | Metric | SmolLM2-135M-Instruct | SmolLM-135M-Instruct |
90
+ |:-----------------------------|:---------------------:|:--------------------:|
91
+ | IFEval (Average prompt/inst) | **29.9** | 17.2 |
92
+ | MT-Bench | **19.8** | 16.8 |
93
+ | HellaSwag | **40.9** | 38.9 |
94
+ | ARC (Average) | **37.3** | 33.9 |
95
+ | PIQA | **66.3** | 64.0 |
96
+ | MMLU (cloze) | **29.3** | 28.3 |
97
+ | BBH (3-shot) | **28.2** | 25.2 |
98
+ | GSM8K (5-shot) | 1.4 | 1.4 |
99
+
100
+
101
+
102
+ ## Limitations
103
+
104
+ SmolLM2 models primarily understand and generate content in English. They can produce text on a variety of topics, but the generated content may not always be factually accurate, logically consistent, or free from biases present in the training data. These models should be used as assistive tools rather than definitive sources of information. Users should always verify important information and critically evaluate any generated content.
105
+
106
+ ## Training
107
+
108
+ ### Model
109
+
110
+ - **Architecture:** Transformer decoder
111
+ - **Pretraining tokens:** 2T
112
+ - **Precision:** bfloat16
113
+
114
+ ### Hardware
115
+
116
+ - **GPUs:** 64 H100
117
+
118
+ ### Software
119
+
120
+ - **Training Framework:** [nanotron](https://github.com/huggingface/nanotron/tree/main)
121
+
122
+ ## License
123
+
124
+ [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
125
+
126
+ ## Citation
127
+ ```bash
128
+ @misc{allal2024SmolLM2,
129
+ title={SmolLM2 - with great data, comes great performance},
130
+ author={Loubna Ben Allal and Anton Lozhkov and Elie Bakouch and Gabriel Martín Blázquez and Lewis Tunstall and Agustín Piqueres and Andres Marafioti and Cyril Zakka and Leandro von Werra and Thomas Wolf},
131
+ year={2024},
132
+ }
133
+ ```
ComfyUI/models/smol/SmolLM2-135M-Instruct/all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9973828840617638,
3
+ "eval_logits/chosen": 4.600930213928223,
4
+ "eval_logits/rejected": 4.9520039558410645,
5
+ "eval_logps/chosen": -443.648193359375,
6
+ "eval_logps/rejected": -378.15826416015625,
7
+ "eval_loss": 0.6740825176239014,
8
+ "eval_rewards/accuracies": 0.6150793433189392,
9
+ "eval_rewards/chosen": -0.07192634046077728,
10
+ "eval_rewards/margins": 0.26874542236328125,
11
+ "eval_rewards/rejected": -0.34067174792289734,
12
+ "eval_runtime": 20.4479,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 97.81,
15
+ "eval_steps_per_second": 3.081,
16
+ "total_flos": 0.0,
17
+ "train_loss": 0.675485389037702,
18
+ "train_runtime": 5897.7907,
19
+ "train_samples": 61134,
20
+ "train_samples_per_second": 20.731,
21
+ "train_steps_per_second": 0.162
22
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 576,
11
+ "initializer_range": 0.041666666666666664,
12
+ "intermediate_size": 1536,
13
+ "is_llama_config": true,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 9,
18
+ "num_hidden_layers": 30,
19
+ "num_key_value_heads": 3,
20
+ "pad_token_id": 2,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_interleaved": false,
24
+ "rope_scaling": null,
25
+ "rope_theta": 100000,
26
+ "tie_word_embeddings": true,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.42.3",
29
+ "transformers.js_config": {
30
+ "kv_cache_dtype": {
31
+ "q4f16": "float16",
32
+ "fp16": "float16"
33
+ }
34
+ },
35
+ "use_cache": true,
36
+ "vocab_size": 49152
37
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9973828840617638,
3
+ "eval_logits/chosen": 4.600930213928223,
4
+ "eval_logits/rejected": 4.9520039558410645,
5
+ "eval_logps/chosen": -443.648193359375,
6
+ "eval_logps/rejected": -378.15826416015625,
7
+ "eval_loss": 0.6740825176239014,
8
+ "eval_rewards/accuracies": 0.6150793433189392,
9
+ "eval_rewards/chosen": -0.07192634046077728,
10
+ "eval_rewards/margins": 0.26874542236328125,
11
+ "eval_rewards/rejected": -0.34067174792289734,
12
+ "eval_runtime": 20.4479,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 97.81,
15
+ "eval_steps_per_second": 3.081
16
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.42.3"
7
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-135M-Instruct/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af571cbf074e6d21a03528d2330792e532ca608f24ac70a143f6b369968ab8c
3
+ size 269060552
ComfyUI/models/smol/SmolLM2-135M-Instruct/runs/Oct31_10-14-22_ip-26-0-174-36/events.out.tfevents.1730370128.ip-26-0-174-36.3239327.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:774faae98f36baa1ee525b594023c5fc25b4ad9249e04ccdf4cba9ee4f4585fd
3
+ size 78157
ComfyUI/models/smol/SmolLM2-135M-Instruct/runs/Oct31_10-14-22_ip-26-0-174-36/events.out.tfevents.1730376056.ip-26-0-174-36.3239327.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:043a35029ecb401ef0df75148304075329cd444faf2097b0316e9200018fea5c
3
+ size 828
ComfyUI/models/smol/SmolLM2-135M-Instruct/special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-135M-Instruct/tokenizer_config.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<repo_name>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "4": {
37
+ "content": "<reponame>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "5": {
45
+ "content": "<file_sep>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "6": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "7": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "8": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "9": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "10": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "11": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "12": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "13": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "14": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "15": {
125
+ "content": "<jupyter_script>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "16": {
133
+ "content": "<empty_output>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ }
140
+ },
141
+ "additional_special_tokens": [
142
+ "<|im_start|>",
143
+ "<|im_end|>"
144
+ ],
145
+ "bos_token": "<|im_start|>",
146
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
+ "clean_up_tokenization_spaces": false,
148
+ "eos_token": "<|im_end|>",
149
+ "model_max_length": 2048,
150
+ "pad_token": "<|im_end|>",
151
+ "tokenizer_class": "GPT2Tokenizer",
152
+ "unk_token": "<|endoftext|>",
153
+ "vocab_size": 49152
154
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9973828840617638,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.675485389037702,
5
+ "train_runtime": 5897.7907,
6
+ "train_samples": 61134,
7
+ "train_samples_per_second": 20.731,
8
+ "train_steps_per_second": 0.162
9
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/trainer_state.json ADDED
@@ -0,0 +1,1626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.9973828840617638,
5
+ "eval_steps": 100,
6
+ "global_step": 954,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.002093692750588851,
13
+ "grad_norm": 38.333577570579074,
14
+ "learning_rate": 1.0416666666666666e-08,
15
+ "logits/chosen": 5.468747138977051,
16
+ "logits/rejected": 5.353150367736816,
17
+ "logps/chosen": -399.0700988769531,
18
+ "logps/rejected": -414.2703857421875,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.02093692750588851,
28
+ "grad_norm": 36.45097456473781,
29
+ "learning_rate": 1.0416666666666667e-07,
30
+ "logits/chosen": 4.634159088134766,
31
+ "logits/rejected": 4.8650617599487305,
32
+ "logps/chosen": -481.9865417480469,
33
+ "logps/rejected": -402.9172668457031,
34
+ "loss": 0.7192,
35
+ "rewards/accuracies": 0.4236111044883728,
36
+ "rewards/chosen": 0.036201052367687225,
37
+ "rewards/margins": 0.05521820858120918,
38
+ "rewards/rejected": -0.01901715248823166,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.04187385501177702,
43
+ "grad_norm": 38.31461130932718,
44
+ "learning_rate": 2.0833333333333333e-07,
45
+ "logits/chosen": 4.8017449378967285,
46
+ "logits/rejected": 5.193596363067627,
47
+ "logps/chosen": -428.74591064453125,
48
+ "logps/rejected": -379.7098693847656,
49
+ "loss": 0.7525,
50
+ "rewards/accuracies": 0.512499988079071,
51
+ "rewards/chosen": 0.014457901008427143,
52
+ "rewards/margins": -0.03727109357714653,
53
+ "rewards/rejected": 0.0517289862036705,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.06281078251766553,
58
+ "grad_norm": 38.39033659648525,
59
+ "learning_rate": 3.1249999999999997e-07,
60
+ "logits/chosen": 4.625308513641357,
61
+ "logits/rejected": 4.913487434387207,
62
+ "logps/chosen": -459.8934631347656,
63
+ "logps/rejected": -365.87176513671875,
64
+ "loss": 0.7389,
65
+ "rewards/accuracies": 0.543749988079071,
66
+ "rewards/chosen": 0.032367587089538574,
67
+ "rewards/margins": 0.07106685638427734,
68
+ "rewards/rejected": -0.03869926929473877,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.08374771002355404,
73
+ "grad_norm": 34.959297165636315,
74
+ "learning_rate": 4.1666666666666667e-07,
75
+ "logits/chosen": 5.135643005371094,
76
+ "logits/rejected": 5.29467248916626,
77
+ "logps/chosen": -388.5003662109375,
78
+ "logps/rejected": -341.11138916015625,
79
+ "loss": 0.7521,
80
+ "rewards/accuracies": 0.44999998807907104,
81
+ "rewards/chosen": -0.061884332448244095,
82
+ "rewards/margins": -0.051958512514829636,
83
+ "rewards/rejected": -0.009925814345479012,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.10468463752944256,
88
+ "grad_norm": 40.59746593571697,
89
+ "learning_rate": 5.208333333333334e-07,
90
+ "logits/chosen": 4.794947147369385,
91
+ "logits/rejected": 5.206262111663818,
92
+ "logps/chosen": -418.7637634277344,
93
+ "logps/rejected": -366.21783447265625,
94
+ "loss": 0.7539,
95
+ "rewards/accuracies": 0.5249999761581421,
96
+ "rewards/chosen": -0.0046131848357617855,
97
+ "rewards/margins": 0.009517465718090534,
98
+ "rewards/rejected": -0.014130651950836182,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.12562156503533106,
103
+ "grad_norm": 35.116262927070615,
104
+ "learning_rate": 6.249999999999999e-07,
105
+ "logits/chosen": 4.984349250793457,
106
+ "logits/rejected": 5.210784435272217,
107
+ "logps/chosen": -389.5479431152344,
108
+ "logps/rejected": -355.3258361816406,
109
+ "loss": 0.7337,
110
+ "rewards/accuracies": 0.581250011920929,
111
+ "rewards/chosen": 0.09027661383152008,
112
+ "rewards/margins": 0.13924987614154816,
113
+ "rewards/rejected": -0.048973266035318375,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.14655849254121958,
118
+ "grad_norm": 37.65630163274615,
119
+ "learning_rate": 7.291666666666666e-07,
120
+ "logits/chosen": 5.079476356506348,
121
+ "logits/rejected": 5.1062331199646,
122
+ "logps/chosen": -472.6788635253906,
123
+ "logps/rejected": -410.6566467285156,
124
+ "loss": 0.7532,
125
+ "rewards/accuracies": 0.48750001192092896,
126
+ "rewards/chosen": 0.01137494295835495,
127
+ "rewards/margins": -0.019245151430368423,
128
+ "rewards/rejected": 0.030620098114013672,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.16749542004710807,
133
+ "grad_norm": 36.35209638887961,
134
+ "learning_rate": 8.333333333333333e-07,
135
+ "logits/chosen": 4.831971645355225,
136
+ "logits/rejected": 5.179555892944336,
137
+ "logps/chosen": -465.8661193847656,
138
+ "logps/rejected": -352.46063232421875,
139
+ "loss": 0.7337,
140
+ "rewards/accuracies": 0.5249999761581421,
141
+ "rewards/chosen": 0.029347699135541916,
142
+ "rewards/margins": 0.011580700054764748,
143
+ "rewards/rejected": 0.017767000943422318,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.1884323475529966,
148
+ "grad_norm": 40.247342074541066,
149
+ "learning_rate": 9.374999999999999e-07,
150
+ "logits/chosen": 4.667853355407715,
151
+ "logits/rejected": 5.083367347717285,
152
+ "logps/chosen": -410.145263671875,
153
+ "logps/rejected": -347.24871826171875,
154
+ "loss": 0.7325,
155
+ "rewards/accuracies": 0.5625,
156
+ "rewards/chosen": 0.04706032946705818,
157
+ "rewards/margins": 0.05596155673265457,
158
+ "rewards/rejected": -0.008901228196918964,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.2093692750588851,
163
+ "grad_norm": 36.88448509348576,
164
+ "learning_rate": 9.999463737538052e-07,
165
+ "logits/chosen": 5.017066955566406,
166
+ "logits/rejected": 5.157826900482178,
167
+ "logps/chosen": -453.6114196777344,
168
+ "logps/rejected": -376.13214111328125,
169
+ "loss": 0.7296,
170
+ "rewards/accuracies": 0.543749988079071,
171
+ "rewards/chosen": 0.03123999759554863,
172
+ "rewards/margins": 0.038888636976480484,
173
+ "rewards/rejected": -0.007648637983947992,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.2093692750588851,
178
+ "eval_logits/chosen": 4.755386829376221,
179
+ "eval_logits/rejected": 5.127224445343018,
180
+ "eval_logps/chosen": -443.48101806640625,
181
+ "eval_logps/rejected": -377.5273742675781,
182
+ "eval_loss": 0.7357296347618103,
183
+ "eval_rewards/accuracies": 0.5515872836112976,
184
+ "eval_rewards/chosen": 0.01168334111571312,
185
+ "eval_rewards/margins": 0.03692733868956566,
186
+ "eval_rewards/rejected": -0.025244001299142838,
187
+ "eval_runtime": 21.3186,
188
+ "eval_samples_per_second": 93.815,
189
+ "eval_steps_per_second": 2.955,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.23030620256477363,
194
+ "grad_norm": 36.21924654761057,
195
+ "learning_rate": 9.993432105822034e-07,
196
+ "logits/chosen": 4.768385887145996,
197
+ "logits/rejected": 5.076653957366943,
198
+ "logps/chosen": -449.16375732421875,
199
+ "logps/rejected": -369.2919006347656,
200
+ "loss": 0.7211,
201
+ "rewards/accuracies": 0.42500001192092896,
202
+ "rewards/chosen": -0.04003235697746277,
203
+ "rewards/margins": -0.059906214475631714,
204
+ "rewards/rejected": 0.019873863086104393,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 0.2512431300706621,
209
+ "grad_norm": 36.12599439727971,
210
+ "learning_rate": 9.980706626858607e-07,
211
+ "logits/chosen": 5.0697174072265625,
212
+ "logits/rejected": 5.350961208343506,
213
+ "logps/chosen": -392.9084777832031,
214
+ "logps/rejected": -342.9964294433594,
215
+ "loss": 0.7213,
216
+ "rewards/accuracies": 0.543749988079071,
217
+ "rewards/chosen": 0.005513651762157679,
218
+ "rewards/margins": 0.07320307195186615,
219
+ "rewards/rejected": -0.06768941879272461,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 0.2721800575765506,
224
+ "grad_norm": 43.81008737879554,
225
+ "learning_rate": 9.961304359538434e-07,
226
+ "logits/chosen": 4.7396440505981445,
227
+ "logits/rejected": 5.110291957855225,
228
+ "logps/chosen": -445.08209228515625,
229
+ "logps/rejected": -356.9689636230469,
230
+ "loss": 0.7319,
231
+ "rewards/accuracies": 0.543749988079071,
232
+ "rewards/chosen": 0.03245898336172104,
233
+ "rewards/margins": 0.0837341919541359,
234
+ "rewards/rejected": -0.05127520486712456,
235
+ "step": 130
236
+ },
237
+ {
238
+ "epoch": 0.29311698508243916,
239
+ "grad_norm": 37.12091478913465,
240
+ "learning_rate": 9.935251313189563e-07,
241
+ "logits/chosen": 4.5339274406433105,
242
+ "logits/rejected": 5.020459175109863,
243
+ "logps/chosen": -473.4126892089844,
244
+ "logps/rejected": -364.12939453125,
245
+ "loss": 0.7193,
246
+ "rewards/accuracies": 0.5562499761581421,
247
+ "rewards/chosen": 0.008073748089373112,
248
+ "rewards/margins": 0.035515300929546356,
249
+ "rewards/rejected": -0.02744155190885067,
250
+ "step": 140
251
+ },
252
+ {
253
+ "epoch": 0.31405391258832765,
254
+ "grad_norm": 36.18564480374988,
255
+ "learning_rate": 9.902582412711118e-07,
256
+ "logits/chosen": 4.540812969207764,
257
+ "logits/rejected": 4.964258193969727,
258
+ "logps/chosen": -426.5033264160156,
259
+ "logps/rejected": -353.1463317871094,
260
+ "loss": 0.7232,
261
+ "rewards/accuracies": 0.512499988079071,
262
+ "rewards/chosen": -0.01480414904654026,
263
+ "rewards/margins": 0.06241898611187935,
264
+ "rewards/rejected": -0.07722313702106476,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 0.33499084009421615,
269
+ "grad_norm": 32.90233228487631,
270
+ "learning_rate": 9.86334145175542e-07,
271
+ "logits/chosen": 4.807779788970947,
272
+ "logits/rejected": 5.042156219482422,
273
+ "logps/chosen": -396.0440673828125,
274
+ "logps/rejected": -360.52886962890625,
275
+ "loss": 0.7013,
276
+ "rewards/accuracies": 0.5249999761581421,
277
+ "rewards/chosen": 0.010227044112980366,
278
+ "rewards/margins": 0.055318038910627365,
279
+ "rewards/rejected": -0.045090995728969574,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 0.3559277676001047,
284
+ "grad_norm": 37.311964741290105,
285
+ "learning_rate": 9.817581034021272e-07,
286
+ "logits/chosen": 4.897703170776367,
287
+ "logits/rejected": 5.062272071838379,
288
+ "logps/chosen": -389.55810546875,
289
+ "logps/rejected": -329.748779296875,
290
+ "loss": 0.7043,
291
+ "rewards/accuracies": 0.518750011920929,
292
+ "rewards/chosen": -0.008063828572630882,
293
+ "rewards/margins": 0.049024712294340134,
294
+ "rewards/rejected": -0.05708853527903557,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 0.3768646951059932,
299
+ "grad_norm": 34.14102924438106,
300
+ "learning_rate": 9.765362502737097e-07,
301
+ "logits/chosen": 5.039429187774658,
302
+ "logits/rejected": 5.049492835998535,
303
+ "logps/chosen": -384.9471130371094,
304
+ "logps/rejected": -381.6601257324219,
305
+ "loss": 0.7091,
306
+ "rewards/accuracies": 0.5562499761581421,
307
+ "rewards/chosen": 0.004380516707897186,
308
+ "rewards/margins": 0.038515396416187286,
309
+ "rewards/rejected": -0.0341348834335804,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 0.39780162261188173,
314
+ "grad_norm": 35.88568083270778,
315
+ "learning_rate": 9.706755858428485e-07,
316
+ "logits/chosen": 5.025214195251465,
317
+ "logits/rejected": 5.097342014312744,
318
+ "logps/chosen": -397.56402587890625,
319
+ "logps/rejected": -396.12799072265625,
320
+ "loss": 0.7161,
321
+ "rewards/accuracies": 0.518750011920929,
322
+ "rewards/chosen": -0.056682147085666656,
323
+ "rewards/margins": 0.023842817172408104,
324
+ "rewards/rejected": -0.08052496612071991,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 0.4187385501177702,
329
+ "grad_norm": 38.60485416145746,
330
+ "learning_rate": 9.641839665080363e-07,
331
+ "logits/chosen": 5.1590471267700195,
332
+ "logits/rejected": 5.290652275085449,
333
+ "logps/chosen": -399.9522399902344,
334
+ "logps/rejected": -363.53936767578125,
335
+ "loss": 0.7062,
336
+ "rewards/accuracies": 0.53125,
337
+ "rewards/chosen": 0.04297986626625061,
338
+ "rewards/margins": 0.06266864389181137,
339
+ "rewards/rejected": -0.019688773900270462,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.4187385501177702,
344
+ "eval_logits/chosen": 4.725487232208252,
345
+ "eval_logits/rejected": 5.087867736816406,
346
+ "eval_logps/chosen": -443.55450439453125,
347
+ "eval_logps/rejected": -377.6705627441406,
348
+ "eval_loss": 0.6988219022750854,
349
+ "eval_rewards/accuracies": 0.567460298538208,
350
+ "eval_rewards/chosen": -0.025081120431423187,
351
+ "eval_rewards/margins": 0.07174728065729141,
352
+ "eval_rewards/rejected": -0.0968284010887146,
353
+ "eval_runtime": 21.5315,
354
+ "eval_samples_per_second": 92.887,
355
+ "eval_steps_per_second": 2.926,
356
+ "step": 200
357
+ },
358
+ {
359
+ "epoch": 0.4396754776236587,
360
+ "grad_norm": 36.10684145537435,
361
+ "learning_rate": 9.570700944819582e-07,
362
+ "logits/chosen": 4.827897548675537,
363
+ "logits/rejected": 5.154609680175781,
364
+ "logps/chosen": -451.2969665527344,
365
+ "logps/rejected": -372.116455078125,
366
+ "loss": 0.7141,
367
+ "rewards/accuracies": 0.543749988079071,
368
+ "rewards/chosen": -0.07117662578821182,
369
+ "rewards/margins": 0.02444976009428501,
370
+ "rewards/rejected": -0.09562637656927109,
371
+ "step": 210
372
+ },
373
+ {
374
+ "epoch": 0.46061240512954726,
375
+ "grad_norm": 33.13953697631782,
376
+ "learning_rate": 9.493435061259129e-07,
377
+ "logits/chosen": 5.24191427230835,
378
+ "logits/rejected": 5.477172374725342,
379
+ "logps/chosen": -365.3572692871094,
380
+ "logps/rejected": -345.34814453125,
381
+ "loss": 0.7138,
382
+ "rewards/accuracies": 0.48750001192092896,
383
+ "rewards/chosen": -0.07684006541967392,
384
+ "rewards/margins": -0.033464811742305756,
385
+ "rewards/rejected": -0.043375253677368164,
386
+ "step": 220
387
+ },
388
+ {
389
+ "epoch": 0.48154933263543576,
390
+ "grad_norm": 36.46869252662201,
391
+ "learning_rate": 9.4101455916603e-07,
392
+ "logits/chosen": 4.996638298034668,
393
+ "logits/rejected": 5.203185081481934,
394
+ "logps/chosen": -390.2169494628906,
395
+ "logps/rejected": -381.74090576171875,
396
+ "loss": 0.7027,
397
+ "rewards/accuracies": 0.574999988079071,
398
+ "rewards/chosen": -0.006761978380382061,
399
+ "rewards/margins": 0.11298926174640656,
400
+ "rewards/rejected": -0.11975125223398209,
401
+ "step": 230
402
+ },
403
+ {
404
+ "epoch": 0.5024862601413242,
405
+ "grad_norm": 32.465417090707724,
406
+ "learning_rate": 9.320944188084241e-07,
407
+ "logits/chosen": 4.961588382720947,
408
+ "logits/rejected": 5.104936122894287,
409
+ "logps/chosen": -405.73651123046875,
410
+ "logps/rejected": -368.39312744140625,
411
+ "loss": 0.6966,
412
+ "rewards/accuracies": 0.612500011920929,
413
+ "rewards/chosen": 0.04590672254562378,
414
+ "rewards/margins": 0.21882851421833038,
415
+ "rewards/rejected": -0.1729217916727066,
416
+ "step": 240
417
+ },
418
+ {
419
+ "epoch": 0.5234231876472127,
420
+ "grad_norm": 32.56446548910322,
421
+ "learning_rate": 9.225950427718974e-07,
422
+ "logits/chosen": 4.295259475708008,
423
+ "logits/rejected": 4.731950759887695,
424
+ "logps/chosen": -457.6085510253906,
425
+ "logps/rejected": -378.92083740234375,
426
+ "loss": 0.6895,
427
+ "rewards/accuracies": 0.5249999761581421,
428
+ "rewards/chosen": -0.08796132355928421,
429
+ "rewards/margins": 0.07854396849870682,
430
+ "rewards/rejected": -0.16650527715682983,
431
+ "step": 250
432
+ },
433
+ {
434
+ "epoch": 0.5443601151531012,
435
+ "grad_norm": 37.3555609307497,
436
+ "learning_rate": 9.125291652582547e-07,
437
+ "logits/chosen": 4.772681713104248,
438
+ "logits/rejected": 4.774602890014648,
439
+ "logps/chosen": -429.554931640625,
440
+ "logps/rejected": -350.5638122558594,
441
+ "loss": 0.697,
442
+ "rewards/accuracies": 0.574999988079071,
443
+ "rewards/chosen": 0.017600687220692635,
444
+ "rewards/margins": 0.15435068309307098,
445
+ "rewards/rejected": -0.1367500126361847,
446
+ "step": 260
447
+ },
448
+ {
449
+ "epoch": 0.5652970426589898,
450
+ "grad_norm": 33.42195670184768,
451
+ "learning_rate": 9.019102798817195e-07,
452
+ "logits/chosen": 4.580355644226074,
453
+ "logits/rejected": 5.011557102203369,
454
+ "logps/chosen": -446.68438720703125,
455
+ "logps/rejected": -380.5400390625,
456
+ "loss": 0.6778,
457
+ "rewards/accuracies": 0.5625,
458
+ "rewards/chosen": 0.019351882860064507,
459
+ "rewards/margins": 0.15824225544929504,
460
+ "rewards/rejected": -0.13889038562774658,
461
+ "step": 270
462
+ },
463
+ {
464
+ "epoch": 0.5862339701648783,
465
+ "grad_norm": 39.119947805234894,
466
+ "learning_rate": 8.90752621580335e-07,
467
+ "logits/chosen": 5.025314807891846,
468
+ "logits/rejected": 5.18468713760376,
469
+ "logps/chosen": -424.27191162109375,
470
+ "logps/rejected": -344.4115295410156,
471
+ "loss": 0.7073,
472
+ "rewards/accuracies": 0.625,
473
+ "rewards/chosen": 0.023305395618081093,
474
+ "rewards/margins": 0.14927226305007935,
475
+ "rewards/rejected": -0.125966876745224,
476
+ "step": 280
477
+ },
478
+ {
479
+ "epoch": 0.6071708976707668,
480
+ "grad_norm": 33.472133866310024,
481
+ "learning_rate": 8.79071147533597e-07,
482
+ "logits/chosen": 4.961835861206055,
483
+ "logits/rejected": 5.123082637786865,
484
+ "logps/chosen": -400.4245300292969,
485
+ "logps/rejected": -388.8963623046875,
486
+ "loss": 0.6687,
487
+ "rewards/accuracies": 0.643750011920929,
488
+ "rewards/chosen": 0.08449111878871918,
489
+ "rewards/margins": 0.2874522805213928,
490
+ "rewards/rejected": -0.20296116173267365,
491
+ "step": 290
492
+ },
493
+ {
494
+ "epoch": 0.6281078251766553,
495
+ "grad_norm": 38.29458361274397,
496
+ "learning_rate": 8.668815171119019e-07,
497
+ "logits/chosen": 4.6071085929870605,
498
+ "logits/rejected": 4.85768985748291,
499
+ "logps/chosen": -445.59393310546875,
500
+ "logps/rejected": -373.83636474609375,
501
+ "loss": 0.6782,
502
+ "rewards/accuracies": 0.643750011920929,
503
+ "rewards/chosen": 0.043323811143636703,
504
+ "rewards/margins": 0.2540794312953949,
505
+ "rewards/rejected": -0.2107556313276291,
506
+ "step": 300
507
+ },
508
+ {
509
+ "epoch": 0.6281078251766553,
510
+ "eval_logits/chosen": 4.662118434906006,
511
+ "eval_logits/rejected": 5.016141414642334,
512
+ "eval_logps/chosen": -443.56884765625,
513
+ "eval_logps/rejected": -377.883056640625,
514
+ "eval_loss": 0.6942777037620544,
515
+ "eval_rewards/accuracies": 0.567460298538208,
516
+ "eval_rewards/chosen": -0.03225937858223915,
517
+ "eval_rewards/margins": 0.17080551385879517,
518
+ "eval_rewards/rejected": -0.20306488871574402,
519
+ "eval_runtime": 21.6344,
520
+ "eval_samples_per_second": 92.445,
521
+ "eval_steps_per_second": 2.912,
522
+ "step": 300
523
+ },
524
+ {
525
+ "epoch": 0.6490447526825438,
526
+ "grad_norm": 35.277636776310345,
527
+ "learning_rate": 8.54200070884685e-07,
528
+ "logits/chosen": 4.7398271560668945,
529
+ "logits/rejected": 5.0438690185546875,
530
+ "logps/chosen": -455.08074951171875,
531
+ "logps/rejected": -346.21905517578125,
532
+ "loss": 0.6648,
533
+ "rewards/accuracies": 0.5874999761581421,
534
+ "rewards/chosen": -0.00532907247543335,
535
+ "rewards/margins": 0.22818481922149658,
536
+ "rewards/rejected": -0.23351387679576874,
537
+ "step": 310
538
+ },
539
+ {
540
+ "epoch": 0.6699816801884323,
541
+ "grad_norm": 36.1242464612453,
542
+ "learning_rate": 8.410438087153911e-07,
543
+ "logits/chosen": 4.823008060455322,
544
+ "logits/rejected": 4.949624538421631,
545
+ "logps/chosen": -420.04150390625,
546
+ "logps/rejected": -346.31134033203125,
547
+ "loss": 0.6633,
548
+ "rewards/accuracies": 0.5625,
549
+ "rewards/chosen": -0.02057427167892456,
550
+ "rewards/margins": 0.1925923228263855,
551
+ "rewards/rejected": -0.21316656470298767,
552
+ "step": 320
553
+ },
554
+ {
555
+ "epoch": 0.6909186076943209,
556
+ "grad_norm": 34.485635067716444,
557
+ "learning_rate": 8.274303669726426e-07,
558
+ "logits/chosen": 4.866278171539307,
559
+ "logits/rejected": 5.084838390350342,
560
+ "logps/chosen": -413.07647705078125,
561
+ "logps/rejected": -359.72637939453125,
562
+ "loss": 0.6964,
563
+ "rewards/accuracies": 0.5562499761581421,
564
+ "rewards/chosen": -0.0661710649728775,
565
+ "rewards/margins": 0.11275775730609894,
566
+ "rewards/rejected": -0.17892882227897644,
567
+ "step": 330
568
+ },
569
+ {
570
+ "epoch": 0.7118555352002094,
571
+ "grad_norm": 36.021204090397475,
572
+ "learning_rate": 8.133779948881513e-07,
573
+ "logits/chosen": 4.962647914886475,
574
+ "logits/rejected": 5.274256229400635,
575
+ "logps/chosen": -423.34796142578125,
576
+ "logps/rejected": -374.83831787109375,
577
+ "loss": 0.6843,
578
+ "rewards/accuracies": 0.581250011920929,
579
+ "rewards/chosen": -0.06308840215206146,
580
+ "rewards/margins": 0.14286582171916962,
581
+ "rewards/rejected": -0.20595422387123108,
582
+ "step": 340
583
+ },
584
+ {
585
+ "epoch": 0.7327924627060979,
586
+ "grad_norm": 40.80633953486743,
587
+ "learning_rate": 7.989055300930704e-07,
588
+ "logits/chosen": 4.9410552978515625,
589
+ "logits/rejected": 5.171365737915039,
590
+ "logps/chosen": -401.3800048828125,
591
+ "logps/rejected": -339.8207702636719,
592
+ "loss": 0.6799,
593
+ "rewards/accuracies": 0.581250011920929,
594
+ "rewards/chosen": -0.05132729932665825,
595
+ "rewards/margins": 0.14917483925819397,
596
+ "rewards/rejected": -0.20050212740898132,
597
+ "step": 350
598
+ },
599
+ {
600
+ "epoch": 0.7537293902119864,
601
+ "grad_norm": 32.290367261199215,
602
+ "learning_rate": 7.840323733655778e-07,
603
+ "logits/chosen": 4.760105609893799,
604
+ "logits/rejected": 4.936800956726074,
605
+ "logps/chosen": -475.94305419921875,
606
+ "logps/rejected": -373.4317626953125,
607
+ "loss": 0.6723,
608
+ "rewards/accuracies": 0.65625,
609
+ "rewards/chosen": 0.040363796055316925,
610
+ "rewards/margins": 0.26596716046333313,
611
+ "rewards/rejected": -0.2256033718585968,
612
+ "step": 360
613
+ },
614
+ {
615
+ "epoch": 0.7746663177178749,
616
+ "grad_norm": 33.44911142948228,
617
+ "learning_rate": 7.687784626235447e-07,
618
+ "logits/chosen": 4.649796485900879,
619
+ "logits/rejected": 4.882054328918457,
620
+ "logps/chosen": -437.54791259765625,
621
+ "logps/rejected": -343.1330871582031,
622
+ "loss": 0.6722,
623
+ "rewards/accuracies": 0.6000000238418579,
624
+ "rewards/chosen": -0.03584844991564751,
625
+ "rewards/margins": 0.2080194056034088,
626
+ "rewards/rejected": -0.2438678741455078,
627
+ "step": 370
628
+ },
629
+ {
630
+ "epoch": 0.7956032452237635,
631
+ "grad_norm": 32.64019199720913,
632
+ "learning_rate": 7.531642461971514e-07,
633
+ "logits/chosen": 4.7331953048706055,
634
+ "logits/rejected": 5.047934532165527,
635
+ "logps/chosen": -434.0751953125,
636
+ "logps/rejected": -363.1179504394531,
637
+ "loss": 0.673,
638
+ "rewards/accuracies": 0.581250011920929,
639
+ "rewards/chosen": -0.03724004700779915,
640
+ "rewards/margins": 0.23209133744239807,
641
+ "rewards/rejected": -0.2693313956260681,
642
+ "step": 380
643
+ },
644
+ {
645
+ "epoch": 0.816540172729652,
646
+ "grad_norm": 33.38515796622506,
647
+ "learning_rate": 7.372106554172801e-07,
648
+ "logits/chosen": 4.660643577575684,
649
+ "logits/rejected": 4.7719621658325195,
650
+ "logps/chosen": -434.41015625,
651
+ "logps/rejected": -394.9471130371094,
652
+ "loss": 0.6778,
653
+ "rewards/accuracies": 0.59375,
654
+ "rewards/chosen": 0.021881069988012314,
655
+ "rewards/margins": 0.2415298968553543,
656
+ "rewards/rejected": -0.2196488082408905,
657
+ "step": 390
658
+ },
659
+ {
660
+ "epoch": 0.8374771002355405,
661
+ "grad_norm": 32.60957530709497,
662
+ "learning_rate": 7.209390765564318e-07,
663
+ "logits/chosen": 4.807684421539307,
664
+ "logits/rejected": 5.217709541320801,
665
+ "logps/chosen": -368.07122802734375,
666
+ "logps/rejected": -328.12066650390625,
667
+ "loss": 0.6863,
668
+ "rewards/accuracies": 0.59375,
669
+ "rewards/chosen": -0.03271085396409035,
670
+ "rewards/margins": 0.1988353729248047,
671
+ "rewards/rejected": -0.23154623806476593,
672
+ "step": 400
673
+ },
674
+ {
675
+ "epoch": 0.8374771002355405,
676
+ "eval_logits/chosen": 4.645900249481201,
677
+ "eval_logits/rejected": 4.999230861663818,
678
+ "eval_logps/chosen": -443.68084716796875,
679
+ "eval_logps/rejected": -378.0348205566406,
680
+ "eval_loss": 0.6756832003593445,
681
+ "eval_rewards/accuracies": 0.5992063283920288,
682
+ "eval_rewards/chosen": -0.08822782337665558,
683
+ "eval_rewards/margins": 0.19070643186569214,
684
+ "eval_rewards/rejected": -0.2789342403411865,
685
+ "eval_runtime": 21.4973,
686
+ "eval_samples_per_second": 93.035,
687
+ "eval_steps_per_second": 2.931,
688
+ "step": 400
689
+ },
690
+ {
691
+ "epoch": 0.8584140277414289,
692
+ "grad_norm": 80.32344495768098,
693
+ "learning_rate": 7.043713221597773e-07,
694
+ "logits/chosen": 4.9558234214782715,
695
+ "logits/rejected": 5.171336650848389,
696
+ "logps/chosen": -464.4634704589844,
697
+ "logps/rejected": -378.52130126953125,
698
+ "loss": 0.6691,
699
+ "rewards/accuracies": 0.5687500238418579,
700
+ "rewards/chosen": -0.028427015990018845,
701
+ "rewards/margins": 0.16248683631420135,
702
+ "rewards/rejected": -0.1909138560295105,
703
+ "step": 410
704
+ },
705
+ {
706
+ "epoch": 0.8793509552473174,
707
+ "grad_norm": 38.003974257278905,
708
+ "learning_rate": 6.875296018047809e-07,
709
+ "logits/chosen": 5.062918663024902,
710
+ "logits/rejected": 5.093894958496094,
711
+ "logps/chosen": -414.597900390625,
712
+ "logps/rejected": -392.76422119140625,
713
+ "loss": 0.6778,
714
+ "rewards/accuracies": 0.612500011920929,
715
+ "rewards/chosen": -0.05205658823251724,
716
+ "rewards/margins": 0.166357159614563,
717
+ "rewards/rejected": -0.21841374039649963,
718
+ "step": 420
719
+ },
720
+ {
721
+ "epoch": 0.9002878827532059,
722
+ "grad_norm": 33.39326538286459,
723
+ "learning_rate": 6.704364923285857e-07,
724
+ "logits/chosen": 4.783626556396484,
725
+ "logits/rejected": 5.061443328857422,
726
+ "logps/chosen": -454.7694396972656,
727
+ "logps/rejected": -349.71099853515625,
728
+ "loss": 0.6613,
729
+ "rewards/accuracies": 0.6312500238418579,
730
+ "rewards/chosen": -0.03129550814628601,
731
+ "rewards/margins": 0.2672887146472931,
732
+ "rewards/rejected": -0.2985842227935791,
733
+ "step": 430
734
+ },
735
+ {
736
+ "epoch": 0.9212248102590945,
737
+ "grad_norm": 33.88594593881104,
738
+ "learning_rate": 6.531149075630796e-07,
739
+ "logits/chosen": 4.762629985809326,
740
+ "logits/rejected": 4.992688179016113,
741
+ "logps/chosen": -422.49639892578125,
742
+ "logps/rejected": -342.6626892089844,
743
+ "loss": 0.6829,
744
+ "rewards/accuracies": 0.574999988079071,
745
+ "rewards/chosen": -0.08267354220151901,
746
+ "rewards/margins": 0.18921074271202087,
747
+ "rewards/rejected": -0.2718842923641205,
748
+ "step": 440
749
+ },
750
+ {
751
+ "epoch": 0.942161737764983,
752
+ "grad_norm": 34.194378360359096,
753
+ "learning_rate": 6.355880676182085e-07,
754
+ "logits/chosen": 4.86130952835083,
755
+ "logits/rejected": 5.088041305541992,
756
+ "logps/chosen": -423.82366943359375,
757
+ "logps/rejected": -386.20172119140625,
758
+ "loss": 0.6777,
759
+ "rewards/accuracies": 0.637499988079071,
760
+ "rewards/chosen": -0.029518108814954758,
761
+ "rewards/margins": 0.2812921106815338,
762
+ "rewards/rejected": -0.3108102083206177,
763
+ "step": 450
764
+ },
765
+ {
766
+ "epoch": 0.9630986652708715,
767
+ "grad_norm": 35.220161431379815,
768
+ "learning_rate": 6.178794677547137e-07,
769
+ "logits/chosen": 4.96859073638916,
770
+ "logits/rejected": 5.295912265777588,
771
+ "logps/chosen": -408.28228759765625,
772
+ "logps/rejected": -337.6819763183594,
773
+ "loss": 0.6573,
774
+ "rewards/accuracies": 0.65625,
775
+ "rewards/chosen": -0.02989841438829899,
776
+ "rewards/margins": 0.3131854832172394,
777
+ "rewards/rejected": -0.343083918094635,
778
+ "step": 460
779
+ },
780
+ {
781
+ "epoch": 0.98403559277676,
782
+ "grad_norm": 36.11741005068747,
783
+ "learning_rate": 6.000128468880222e-07,
784
+ "logits/chosen": 4.616504669189453,
785
+ "logits/rejected": 4.935946464538574,
786
+ "logps/chosen": -435.3017578125,
787
+ "logps/rejected": -375.13800048828125,
788
+ "loss": 0.6647,
789
+ "rewards/accuracies": 0.606249988079071,
790
+ "rewards/chosen": -0.11524273455142975,
791
+ "rewards/margins": 0.2546766698360443,
792
+ "rewards/rejected": -0.3699193596839905,
793
+ "step": 470
794
+ },
795
+ {
796
+ "epoch": 1.0049725202826485,
797
+ "grad_norm": 32.471857091487834,
798
+ "learning_rate": 5.820121557655108e-07,
799
+ "logits/chosen": 4.9493536949157715,
800
+ "logits/rejected": 5.226868152618408,
801
+ "logps/chosen": -423.6285095214844,
802
+ "logps/rejected": -362.1949768066406,
803
+ "loss": 0.6629,
804
+ "rewards/accuracies": 0.637499988079071,
805
+ "rewards/chosen": 0.05261852219700813,
806
+ "rewards/margins": 0.3280298113822937,
807
+ "rewards/rejected": -0.27541130781173706,
808
+ "step": 480
809
+ },
810
+ {
811
+ "epoch": 1.025909447788537,
812
+ "grad_norm": 39.51652905408664,
813
+ "learning_rate": 5.639015248598023e-07,
814
+ "logits/chosen": 4.762259006500244,
815
+ "logits/rejected": 5.021244525909424,
816
+ "logps/chosen": -424.96697998046875,
817
+ "logps/rejected": -342.76666259765625,
818
+ "loss": 0.6644,
819
+ "rewards/accuracies": 0.6000000238418579,
820
+ "rewards/chosen": -0.03716667741537094,
821
+ "rewards/margins": 0.2011403739452362,
822
+ "rewards/rejected": -0.23830704391002655,
823
+ "step": 490
824
+ },
825
+ {
826
+ "epoch": 1.0468463752944255,
827
+ "grad_norm": 34.43579926142672,
828
+ "learning_rate": 5.457052320211339e-07,
829
+ "logits/chosen": 4.543593406677246,
830
+ "logits/rejected": 4.786489009857178,
831
+ "logps/chosen": -434.46746826171875,
832
+ "logps/rejected": -367.75689697265625,
833
+ "loss": 0.6836,
834
+ "rewards/accuracies": 0.606249988079071,
835
+ "rewards/chosen": -0.08059108257293701,
836
+ "rewards/margins": 0.23346829414367676,
837
+ "rewards/rejected": -0.3140593469142914,
838
+ "step": 500
839
+ },
840
+ {
841
+ "epoch": 1.0468463752944255,
842
+ "eval_logits/chosen": 4.617003440856934,
843
+ "eval_logits/rejected": 4.9695563316345215,
844
+ "eval_logps/chosen": -443.69580078125,
845
+ "eval_logps/rejected": -378.1418762207031,
846
+ "eval_loss": 0.6708300113677979,
847
+ "eval_rewards/accuracies": 0.6349206566810608,
848
+ "eval_rewards/chosen": -0.09571509808301926,
849
+ "eval_rewards/margins": 0.23677198588848114,
850
+ "eval_rewards/rejected": -0.3324871063232422,
851
+ "eval_runtime": 20.9626,
852
+ "eval_samples_per_second": 95.408,
853
+ "eval_steps_per_second": 3.005,
854
+ "step": 500
855
+ },
856
+ {
857
+ "epoch": 1.067783302800314,
858
+ "grad_norm": 33.3201987416808,
859
+ "learning_rate": 5.274476699321637e-07,
860
+ "logits/chosen": 4.583409786224365,
861
+ "logits/rejected": 4.803020477294922,
862
+ "logps/chosen": -390.48565673828125,
863
+ "logps/rejected": -351.6776123046875,
864
+ "loss": 0.6779,
865
+ "rewards/accuracies": 0.6312500238418579,
866
+ "rewards/chosen": -0.0327589213848114,
867
+ "rewards/margins": 0.32544368505477905,
868
+ "rewards/rejected": -0.35820263624191284,
869
+ "step": 510
870
+ },
871
+ {
872
+ "epoch": 1.0887202303062025,
873
+ "grad_norm": 30.742730814185435,
874
+ "learning_rate": 5.091533134088387e-07,
875
+ "logits/chosen": 4.493949890136719,
876
+ "logits/rejected": 4.9839911460876465,
877
+ "logps/chosen": -383.7958984375,
878
+ "logps/rejected": -354.36480712890625,
879
+ "loss": 0.656,
880
+ "rewards/accuracies": 0.5375000238418579,
881
+ "rewards/chosen": -0.07341745495796204,
882
+ "rewards/margins": 0.19709812104701996,
883
+ "rewards/rejected": -0.2705155909061432,
884
+ "step": 520
885
+ },
886
+ {
887
+ "epoch": 1.109657157812091,
888
+ "grad_norm": 34.05900047947194,
889
+ "learning_rate": 4.908466865911614e-07,
890
+ "logits/chosen": 4.793222904205322,
891
+ "logits/rejected": 5.078155517578125,
892
+ "logps/chosen": -401.0002746582031,
893
+ "logps/rejected": -340.4061279296875,
894
+ "loss": 0.6618,
895
+ "rewards/accuracies": 0.6499999761581421,
896
+ "rewards/chosen": -0.05051114410161972,
897
+ "rewards/margins": 0.27152642607688904,
898
+ "rewards/rejected": -0.32203757762908936,
899
+ "step": 530
900
+ },
901
+ {
902
+ "epoch": 1.1305940853179797,
903
+ "grad_norm": 30.483486401054424,
904
+ "learning_rate": 4.7255233006783624e-07,
905
+ "logits/chosen": 4.857717990875244,
906
+ "logits/rejected": 5.0497636795043945,
907
+ "logps/chosen": -375.65362548828125,
908
+ "logps/rejected": -330.26165771484375,
909
+ "loss": 0.6544,
910
+ "rewards/accuracies": 0.7124999761581421,
911
+ "rewards/chosen": -0.02352207899093628,
912
+ "rewards/margins": 0.35729408264160156,
913
+ "rewards/rejected": -0.38081610202789307,
914
+ "step": 540
915
+ },
916
+ {
917
+ "epoch": 1.151531012823868,
918
+ "grad_norm": 35.09603470685652,
919
+ "learning_rate": 4.5429476797886617e-07,
920
+ "logits/chosen": 4.932369232177734,
921
+ "logits/rejected": 5.050224781036377,
922
+ "logps/chosen": -430.0126953125,
923
+ "logps/rejected": -331.1691589355469,
924
+ "loss": 0.6599,
925
+ "rewards/accuracies": 0.6312500238418579,
926
+ "rewards/chosen": -0.0060789333656430244,
927
+ "rewards/margins": 0.25288811326026917,
928
+ "rewards/rejected": -0.25896701216697693,
929
+ "step": 550
930
+ },
931
+ {
932
+ "epoch": 1.1724679403297567,
933
+ "grad_norm": 40.64422646125966,
934
+ "learning_rate": 4.3609847514019763e-07,
935
+ "logits/chosen": 4.637743949890137,
936
+ "logits/rejected": 5.000674724578857,
937
+ "logps/chosen": -420.3258361816406,
938
+ "logps/rejected": -362.2751159667969,
939
+ "loss": 0.6718,
940
+ "rewards/accuracies": 0.574999988079071,
941
+ "rewards/chosen": -0.0094971414655447,
942
+ "rewards/margins": 0.22678783535957336,
943
+ "rewards/rejected": -0.23628497123718262,
944
+ "step": 560
945
+ },
946
+ {
947
+ "epoch": 1.193404867835645,
948
+ "grad_norm": 32.638009640148645,
949
+ "learning_rate": 4.179878442344892e-07,
950
+ "logits/chosen": 4.855754375457764,
951
+ "logits/rejected": 4.871184349060059,
952
+ "logps/chosen": -384.08660888671875,
953
+ "logps/rejected": -371.4262390136719,
954
+ "loss": 0.6766,
955
+ "rewards/accuracies": 0.625,
956
+ "rewards/chosen": -0.05095939710736275,
957
+ "rewards/margins": 0.28148993849754333,
958
+ "rewards/rejected": -0.332449346780777,
959
+ "step": 570
960
+ },
961
+ {
962
+ "epoch": 1.2143417953415336,
963
+ "grad_norm": 35.519971577107064,
964
+ "learning_rate": 3.9998715311197783e-07,
965
+ "logits/chosen": 4.73850679397583,
966
+ "logits/rejected": 5.173120021820068,
967
+ "logps/chosen": -414.8775329589844,
968
+ "logps/rejected": -341.5818786621094,
969
+ "loss": 0.6508,
970
+ "rewards/accuracies": 0.6000000238418579,
971
+ "rewards/chosen": -0.09668377041816711,
972
+ "rewards/margins": 0.25211262702941895,
973
+ "rewards/rejected": -0.34879642724990845,
974
+ "step": 580
975
+ },
976
+ {
977
+ "epoch": 1.235278722847422,
978
+ "grad_norm": 34.20580765627037,
979
+ "learning_rate": 3.821205322452863e-07,
980
+ "logits/chosen": 4.916988372802734,
981
+ "logits/rejected": 5.1998610496521,
982
+ "logps/chosen": -448.5626525878906,
983
+ "logps/rejected": -367.84027099609375,
984
+ "loss": 0.644,
985
+ "rewards/accuracies": 0.625,
986
+ "rewards/chosen": -0.07886572182178497,
987
+ "rewards/margins": 0.3578983247280121,
988
+ "rewards/rejected": -0.43676406145095825,
989
+ "step": 590
990
+ },
991
+ {
992
+ "epoch": 1.2562156503533106,
993
+ "grad_norm": 33.854286929995176,
994
+ "learning_rate": 3.6441193238179146e-07,
995
+ "logits/chosen": 4.852269649505615,
996
+ "logits/rejected": 4.903324127197266,
997
+ "logps/chosen": -446.4149475097656,
998
+ "logps/rejected": -423.3356018066406,
999
+ "loss": 0.6349,
1000
+ "rewards/accuracies": 0.581250011920929,
1001
+ "rewards/chosen": -0.14010193943977356,
1002
+ "rewards/margins": 0.15067996084690094,
1003
+ "rewards/rejected": -0.2907818853855133,
1004
+ "step": 600
1005
+ },
1006
+ {
1007
+ "epoch": 1.2562156503533106,
1008
+ "eval_logits/chosen": 4.62031364440918,
1009
+ "eval_logits/rejected": 4.9707465171813965,
1010
+ "eval_logps/chosen": -443.61212158203125,
1011
+ "eval_logps/rejected": -378.1197204589844,
1012
+ "eval_loss": 0.6720485091209412,
1013
+ "eval_rewards/accuracies": 0.5992063283920288,
1014
+ "eval_rewards/chosen": -0.053870752453804016,
1015
+ "eval_rewards/margins": 0.267531156539917,
1016
+ "eval_rewards/rejected": -0.321401983499527,
1017
+ "eval_runtime": 20.8046,
1018
+ "eval_samples_per_second": 96.133,
1019
+ "eval_steps_per_second": 3.028,
1020
+ "step": 600
1021
+ },
1022
+ {
1023
+ "epoch": 1.2771525778591992,
1024
+ "grad_norm": 36.085842973391074,
1025
+ "learning_rate": 3.4688509243692034e-07,
1026
+ "logits/chosen": 4.767918586730957,
1027
+ "logits/rejected": 4.757430553436279,
1028
+ "logps/chosen": -407.41668701171875,
1029
+ "logps/rejected": -317.3873596191406,
1030
+ "loss": 0.6402,
1031
+ "rewards/accuracies": 0.643750011920929,
1032
+ "rewards/chosen": -0.08183420449495316,
1033
+ "rewards/margins": 0.33883604407310486,
1034
+ "rewards/rejected": -0.42067021131515503,
1035
+ "step": 610
1036
+ },
1037
+ {
1038
+ "epoch": 1.2980895053650876,
1039
+ "grad_norm": 29.698333183198105,
1040
+ "learning_rate": 3.295635076714144e-07,
1041
+ "logits/chosen": 5.085806846618652,
1042
+ "logits/rejected": 5.415268898010254,
1043
+ "logps/chosen": -395.627685546875,
1044
+ "logps/rejected": -331.7653503417969,
1045
+ "loss": 0.6266,
1046
+ "rewards/accuracies": 0.65625,
1047
+ "rewards/chosen": -0.09060301631689072,
1048
+ "rewards/margins": 0.3094441294670105,
1049
+ "rewards/rejected": -0.4000471234321594,
1050
+ "step": 620
1051
+ },
1052
+ {
1053
+ "epoch": 1.3190264328709762,
1054
+ "grad_norm": 35.208773349468885,
1055
+ "learning_rate": 3.12470398195219e-07,
1056
+ "logits/chosen": 4.828533172607422,
1057
+ "logits/rejected": 4.925856113433838,
1058
+ "logps/chosen": -418.5848083496094,
1059
+ "logps/rejected": -376.3353576660156,
1060
+ "loss": 0.6486,
1061
+ "rewards/accuracies": 0.6312500238418579,
1062
+ "rewards/chosen": 0.06391973793506622,
1063
+ "rewards/margins": 0.44674786925315857,
1064
+ "rewards/rejected": -0.3828281760215759,
1065
+ "step": 630
1066
+ },
1067
+ {
1068
+ "epoch": 1.3399633603768648,
1069
+ "grad_norm": 29.673309842493335,
1070
+ "learning_rate": 2.956286778402226e-07,
1071
+ "logits/chosen": 4.896113872528076,
1072
+ "logits/rejected": 5.183098793029785,
1073
+ "logps/chosen": -394.4980773925781,
1074
+ "logps/rejected": -374.76422119140625,
1075
+ "loss": 0.6394,
1076
+ "rewards/accuracies": 0.6312500238418579,
1077
+ "rewards/chosen": -0.04267222806811333,
1078
+ "rewards/margins": 0.2913575768470764,
1079
+ "rewards/rejected": -0.33402982354164124,
1080
+ "step": 640
1081
+ },
1082
+ {
1083
+ "epoch": 1.3609002878827532,
1084
+ "grad_norm": 35.03684415648848,
1085
+ "learning_rate": 2.7906092344356826e-07,
1086
+ "logits/chosen": 4.610795021057129,
1087
+ "logits/rejected": 4.8373188972473145,
1088
+ "logps/chosen": -379.4288024902344,
1089
+ "logps/rejected": -345.05596923828125,
1090
+ "loss": 0.6646,
1091
+ "rewards/accuracies": 0.581250011920929,
1092
+ "rewards/chosen": -0.1311950385570526,
1093
+ "rewards/margins": 0.2357216328382492,
1094
+ "rewards/rejected": -0.366916686296463,
1095
+ "step": 650
1096
+ },
1097
+ {
1098
+ "epoch": 1.3818372153886418,
1099
+ "grad_norm": 33.06984951084542,
1100
+ "learning_rate": 2.6278934458271996e-07,
1101
+ "logits/chosen": 4.830328941345215,
1102
+ "logits/rejected": 5.017812252044678,
1103
+ "logps/chosen": -377.4278564453125,
1104
+ "logps/rejected": -343.86529541015625,
1105
+ "loss": 0.6613,
1106
+ "rewards/accuracies": 0.5625,
1107
+ "rewards/chosen": -0.10503290593624115,
1108
+ "rewards/margins": 0.11762680858373642,
1109
+ "rewards/rejected": -0.22265975177288055,
1110
+ "step": 660
1111
+ },
1112
+ {
1113
+ "epoch": 1.4027741428945302,
1114
+ "grad_norm": 31.761556922593446,
1115
+ "learning_rate": 2.468357538028487e-07,
1116
+ "logits/chosen": 4.728631496429443,
1117
+ "logits/rejected": 4.90619421005249,
1118
+ "logps/chosen": -413.2724609375,
1119
+ "logps/rejected": -346.9877624511719,
1120
+ "loss": 0.6393,
1121
+ "rewards/accuracies": 0.637499988079071,
1122
+ "rewards/chosen": -0.03663766756653786,
1123
+ "rewards/margins": 0.2855125069618225,
1124
+ "rewards/rejected": -0.32215017080307007,
1125
+ "step": 670
1126
+ },
1127
+ {
1128
+ "epoch": 1.4237110704004188,
1129
+ "grad_norm": 34.93162849349177,
1130
+ "learning_rate": 2.312215373764551e-07,
1131
+ "logits/chosen": 4.728277206420898,
1132
+ "logits/rejected": 5.018845558166504,
1133
+ "logps/chosen": -421.8961486816406,
1134
+ "logps/rejected": -403.57354736328125,
1135
+ "loss": 0.6533,
1136
+ "rewards/accuracies": 0.606249988079071,
1137
+ "rewards/chosen": -0.1260446161031723,
1138
+ "rewards/margins": 0.2200162708759308,
1139
+ "rewards/rejected": -0.3460609018802643,
1140
+ "step": 680
1141
+ },
1142
+ {
1143
+ "epoch": 1.4446479979063072,
1144
+ "grad_norm": 33.66523822793528,
1145
+ "learning_rate": 2.1596762663442213e-07,
1146
+ "logits/chosen": 4.863284111022949,
1147
+ "logits/rejected": 4.840500354766846,
1148
+ "logps/chosen": -422.4331970214844,
1149
+ "logps/rejected": -355.96868896484375,
1150
+ "loss": 0.6477,
1151
+ "rewards/accuracies": 0.625,
1152
+ "rewards/chosen": -0.10065089166164398,
1153
+ "rewards/margins": 0.24511468410491943,
1154
+ "rewards/rejected": -0.3457655906677246,
1155
+ "step": 690
1156
+ },
1157
+ {
1158
+ "epoch": 1.4655849254121958,
1159
+ "grad_norm": 34.48257400044076,
1160
+ "learning_rate": 2.0109446990692963e-07,
1161
+ "logits/chosen": 4.709015846252441,
1162
+ "logits/rejected": 4.914425849914551,
1163
+ "logps/chosen": -452.9461364746094,
1164
+ "logps/rejected": -442.56658935546875,
1165
+ "loss": 0.6427,
1166
+ "rewards/accuracies": 0.65625,
1167
+ "rewards/chosen": 0.08032918721437454,
1168
+ "rewards/margins": 0.39584842324256897,
1169
+ "rewards/rejected": -0.3155192732810974,
1170
+ "step": 700
1171
+ },
1172
+ {
1173
+ "epoch": 1.4655849254121958,
1174
+ "eval_logits/chosen": 4.592012882232666,
1175
+ "eval_logits/rejected": 4.943046569824219,
1176
+ "eval_logps/chosen": -443.6796875,
1177
+ "eval_logps/rejected": -378.1680908203125,
1178
+ "eval_loss": 0.6795812845230103,
1179
+ "eval_rewards/accuracies": 0.60317462682724,
1180
+ "eval_rewards/chosen": -0.08766676485538483,
1181
+ "eval_rewards/margins": 0.25792673230171204,
1182
+ "eval_rewards/rejected": -0.34559354186058044,
1183
+ "eval_runtime": 21.1978,
1184
+ "eval_samples_per_second": 94.349,
1185
+ "eval_steps_per_second": 2.972,
1186
+ "step": 700
1187
+ },
1188
+ {
1189
+ "epoch": 1.4865218529180844,
1190
+ "grad_norm": 36.350524448045036,
1191
+ "learning_rate": 1.8662200511184872e-07,
1192
+ "logits/chosen": 4.871232509613037,
1193
+ "logits/rejected": 4.886293411254883,
1194
+ "logps/chosen": -417.8133850097656,
1195
+ "logps/rejected": -384.177490234375,
1196
+ "loss": 0.6669,
1197
+ "rewards/accuracies": 0.6000000238418579,
1198
+ "rewards/chosen": -0.12399878352880478,
1199
+ "rewards/margins": 0.27855515480041504,
1200
+ "rewards/rejected": -0.40255388617515564,
1201
+ "step": 710
1202
+ },
1203
+ {
1204
+ "epoch": 1.5074587804239727,
1205
+ "grad_norm": 34.52058371975813,
1206
+ "learning_rate": 1.725696330273575e-07,
1207
+ "logits/chosen": 4.8079633712768555,
1208
+ "logits/rejected": 5.118483543395996,
1209
+ "logps/chosen": -433.02032470703125,
1210
+ "logps/rejected": -383.21539306640625,
1211
+ "loss": 0.6234,
1212
+ "rewards/accuracies": 0.65625,
1213
+ "rewards/chosen": 0.049599818885326385,
1214
+ "rewards/margins": 0.36388009786605835,
1215
+ "rewards/rejected": -0.31428030133247375,
1216
+ "step": 720
1217
+ },
1218
+ {
1219
+ "epoch": 1.5283957079298613,
1220
+ "grad_norm": 36.62094520000859,
1221
+ "learning_rate": 1.589561912846089e-07,
1222
+ "logits/chosen": 4.67967414855957,
1223
+ "logits/rejected": 4.974714756011963,
1224
+ "logps/chosen": -402.2828063964844,
1225
+ "logps/rejected": -343.87939453125,
1226
+ "loss": 0.6419,
1227
+ "rewards/accuracies": 0.675000011920929,
1228
+ "rewards/chosen": -0.02172028087079525,
1229
+ "rewards/margins": 0.3966042995452881,
1230
+ "rewards/rejected": -0.4183245599269867,
1231
+ "step": 730
1232
+ },
1233
+ {
1234
+ "epoch": 1.54933263543575,
1235
+ "grad_norm": 34.85140828972076,
1236
+ "learning_rate": 1.4579992911531496e-07,
1237
+ "logits/chosen": 4.999066352844238,
1238
+ "logits/rejected": 5.089913845062256,
1239
+ "logps/chosen": -442.08538818359375,
1240
+ "logps/rejected": -387.76953125,
1241
+ "loss": 0.6641,
1242
+ "rewards/accuracies": 0.6000000238418579,
1243
+ "rewards/chosen": 0.024145543575286865,
1244
+ "rewards/margins": 0.3119828999042511,
1245
+ "rewards/rejected": -0.28783735632896423,
1246
+ "step": 740
1247
+ },
1248
+ {
1249
+ "epoch": 1.5702695629416383,
1250
+ "grad_norm": 33.55559408410901,
1251
+ "learning_rate": 1.3311848288809813e-07,
1252
+ "logits/chosen": 4.944571018218994,
1253
+ "logits/rejected": 4.949963569641113,
1254
+ "logps/chosen": -422.9165954589844,
1255
+ "logps/rejected": -378.2356262207031,
1256
+ "loss": 0.6431,
1257
+ "rewards/accuracies": 0.581250011920929,
1258
+ "rewards/chosen": -0.10791780799627304,
1259
+ "rewards/margins": 0.16808216273784637,
1260
+ "rewards/rejected": -0.2759999632835388,
1261
+ "step": 750
1262
+ },
1263
+ {
1264
+ "epoch": 1.5912064904475267,
1265
+ "grad_norm": 33.284252993746314,
1266
+ "learning_rate": 1.209288524664029e-07,
1267
+ "logits/chosen": 4.269396781921387,
1268
+ "logits/rejected": 4.640176296234131,
1269
+ "logps/chosen": -513.432861328125,
1270
+ "logps/rejected": -464.742431640625,
1271
+ "loss": 0.6505,
1272
+ "rewards/accuracies": 0.59375,
1273
+ "rewards/chosen": 0.00313050439581275,
1274
+ "rewards/margins": 0.3427557051181793,
1275
+ "rewards/rejected": -0.33962517976760864,
1276
+ "step": 760
1277
+ },
1278
+ {
1279
+ "epoch": 1.6121434179534153,
1280
+ "grad_norm": 33.301123590813035,
1281
+ "learning_rate": 1.0924737841966497e-07,
1282
+ "logits/chosen": 4.588865756988525,
1283
+ "logits/rejected": 4.75103235244751,
1284
+ "logps/chosen": -465.42059326171875,
1285
+ "logps/rejected": -370.064697265625,
1286
+ "loss": 0.653,
1287
+ "rewards/accuracies": 0.668749988079071,
1288
+ "rewards/chosen": -0.01498096901923418,
1289
+ "rewards/margins": 0.34728002548217773,
1290
+ "rewards/rejected": -0.3622610569000244,
1291
+ "step": 770
1292
+ },
1293
+ {
1294
+ "epoch": 1.633080345459304,
1295
+ "grad_norm": 33.707974100314466,
1296
+ "learning_rate": 9.808972011828054e-08,
1297
+ "logits/chosen": 4.657374382019043,
1298
+ "logits/rejected": 5.004950523376465,
1299
+ "logps/chosen": -452.0787048339844,
1300
+ "logps/rejected": -383.26824951171875,
1301
+ "loss": 0.6419,
1302
+ "rewards/accuracies": 0.6625000238418579,
1303
+ "rewards/chosen": 0.06604544818401337,
1304
+ "rewards/margins": 0.4590230882167816,
1305
+ "rewards/rejected": -0.39297762513160706,
1306
+ "step": 780
1307
+ },
1308
+ {
1309
+ "epoch": 1.6540172729651923,
1310
+ "grad_norm": 36.400512256730096,
1311
+ "learning_rate": 8.747083474174527e-08,
1312
+ "logits/chosen": 4.775164604187012,
1313
+ "logits/rejected": 5.237417221069336,
1314
+ "logps/chosen": -431.0052185058594,
1315
+ "logps/rejected": -372.1168212890625,
1316
+ "loss": 0.6398,
1317
+ "rewards/accuracies": 0.668749988079071,
1318
+ "rewards/chosen": -0.017582783475518227,
1319
+ "rewards/margins": 0.35258156061172485,
1320
+ "rewards/rejected": -0.37016433477401733,
1321
+ "step": 790
1322
+ },
1323
+ {
1324
+ "epoch": 1.674954200471081,
1325
+ "grad_norm": 29.96252260731642,
1326
+ "learning_rate": 7.740495722810269e-08,
1327
+ "logits/chosen": 4.998331546783447,
1328
+ "logits/rejected": 4.909043312072754,
1329
+ "logps/chosen": -489.783447265625,
1330
+ "logps/rejected": -415.0606384277344,
1331
+ "loss": 0.6128,
1332
+ "rewards/accuracies": 0.606249988079071,
1333
+ "rewards/chosen": 0.04877934604883194,
1334
+ "rewards/margins": 0.3542923033237457,
1335
+ "rewards/rejected": -0.3055129647254944,
1336
+ "step": 800
1337
+ },
1338
+ {
1339
+ "epoch": 1.674954200471081,
1340
+ "eval_logits/chosen": 4.6106181144714355,
1341
+ "eval_logits/rejected": 4.968925476074219,
1342
+ "eval_logps/chosen": -443.625244140625,
1343
+ "eval_logps/rejected": -378.2127990722656,
1344
+ "eval_loss": 0.6703739166259766,
1345
+ "eval_rewards/accuracies": 0.6071428656578064,
1346
+ "eval_rewards/chosen": -0.06042463704943657,
1347
+ "eval_rewards/margins": 0.30752548575401306,
1348
+ "eval_rewards/rejected": -0.3679501414299011,
1349
+ "eval_runtime": 21.1621,
1350
+ "eval_samples_per_second": 94.509,
1351
+ "eval_steps_per_second": 2.977,
1352
+ "step": 800
1353
+ },
1354
+ {
1355
+ "epoch": 1.6958911279769695,
1356
+ "grad_norm": 34.07376933070953,
1357
+ "learning_rate": 6.790558119157597e-08,
1358
+ "logits/chosen": 4.842529773712158,
1359
+ "logits/rejected": 4.945174217224121,
1360
+ "logps/chosen": -446.68682861328125,
1361
+ "logps/rejected": -379.9209899902344,
1362
+ "loss": 0.6409,
1363
+ "rewards/accuracies": 0.668749988079071,
1364
+ "rewards/chosen": 0.010895573534071445,
1365
+ "rewards/margins": 0.47363200783729553,
1366
+ "rewards/rejected": -0.46273642778396606,
1367
+ "step": 810
1368
+ },
1369
+ {
1370
+ "epoch": 1.7168280554828579,
1371
+ "grad_norm": 31.41564508164701,
1372
+ "learning_rate": 5.898544083397e-08,
1373
+ "logits/chosen": 4.57013463973999,
1374
+ "logits/rejected": 4.8762030601501465,
1375
+ "logps/chosen": -459.298583984375,
1376
+ "logps/rejected": -376.189208984375,
1377
+ "loss": 0.6381,
1378
+ "rewards/accuracies": 0.625,
1379
+ "rewards/chosen": -0.04928427189588547,
1380
+ "rewards/margins": 0.33450883626937866,
1381
+ "rewards/rejected": -0.38379308581352234,
1382
+ "step": 820
1383
+ },
1384
+ {
1385
+ "epoch": 1.7377649829887463,
1386
+ "grad_norm": 38.55984096337612,
1387
+ "learning_rate": 5.065649387408705e-08,
1388
+ "logits/chosen": 4.863150596618652,
1389
+ "logits/rejected": 4.996617317199707,
1390
+ "logps/chosen": -405.2935485839844,
1391
+ "logps/rejected": -383.06756591796875,
1392
+ "loss": 0.6587,
1393
+ "rewards/accuracies": 0.6187499761581421,
1394
+ "rewards/chosen": -0.14060600101947784,
1395
+ "rewards/margins": 0.1646648645401001,
1396
+ "rewards/rejected": -0.30527088046073914,
1397
+ "step": 830
1398
+ },
1399
+ {
1400
+ "epoch": 1.7587019104946349,
1401
+ "grad_norm": 32.69891650352482,
1402
+ "learning_rate": 4.292990551804171e-08,
1403
+ "logits/chosen": 4.561503887176514,
1404
+ "logits/rejected": 4.661375522613525,
1405
+ "logps/chosen": -374.9468688964844,
1406
+ "logps/rejected": -359.5188293457031,
1407
+ "loss": 0.6426,
1408
+ "rewards/accuracies": 0.637499988079071,
1409
+ "rewards/chosen": -0.05613694339990616,
1410
+ "rewards/margins": 0.3448534607887268,
1411
+ "rewards/rejected": -0.40099042654037476,
1412
+ "step": 840
1413
+ },
1414
+ {
1415
+ "epoch": 1.7796388380005235,
1416
+ "grad_norm": 32.82316724445512,
1417
+ "learning_rate": 3.581603349196371e-08,
1418
+ "logits/chosen": 4.668177604675293,
1419
+ "logits/rejected": 5.044764518737793,
1420
+ "logps/chosen": -391.29534912109375,
1421
+ "logps/rejected": -374.1195068359375,
1422
+ "loss": 0.6501,
1423
+ "rewards/accuracies": 0.543749988079071,
1424
+ "rewards/chosen": -0.14754648506641388,
1425
+ "rewards/margins": 0.16757197678089142,
1426
+ "rewards/rejected": -0.3151184618473053,
1427
+ "step": 850
1428
+ },
1429
+ {
1430
+ "epoch": 1.8005757655064119,
1431
+ "grad_norm": 32.36442235611696,
1432
+ "learning_rate": 2.9324414157151367e-08,
1433
+ "logits/chosen": 4.706895351409912,
1434
+ "logits/rejected": 5.021437644958496,
1435
+ "logps/chosen": -417.41021728515625,
1436
+ "logps/rejected": -335.3275451660156,
1437
+ "loss": 0.6534,
1438
+ "rewards/accuracies": 0.6812499761581421,
1439
+ "rewards/chosen": -0.06291428953409195,
1440
+ "rewards/margins": 0.3059840798377991,
1441
+ "rewards/rejected": -0.36889833211898804,
1442
+ "step": 860
1443
+ },
1444
+ {
1445
+ "epoch": 1.8215126930123005,
1446
+ "grad_norm": 29.740377909388123,
1447
+ "learning_rate": 2.3463749726290284e-08,
1448
+ "logits/chosen": 4.696743965148926,
1449
+ "logits/rejected": 4.8797287940979,
1450
+ "logps/chosen": -477.77783203125,
1451
+ "logps/rejected": -390.98175048828125,
1452
+ "loss": 0.6614,
1453
+ "rewards/accuracies": 0.53125,
1454
+ "rewards/chosen": -0.07511474192142487,
1455
+ "rewards/margins": 0.17798468470573425,
1456
+ "rewards/rejected": -0.2530994415283203,
1457
+ "step": 870
1458
+ },
1459
+ {
1460
+ "epoch": 1.842449620518189,
1461
+ "grad_norm": 30.952090476967147,
1462
+ "learning_rate": 1.824189659787284e-08,
1463
+ "logits/chosen": 4.781184196472168,
1464
+ "logits/rejected": 5.032862663269043,
1465
+ "logps/chosen": -387.22906494140625,
1466
+ "logps/rejected": -360.9486389160156,
1467
+ "loss": 0.6618,
1468
+ "rewards/accuracies": 0.5874999761581421,
1469
+ "rewards/chosen": -0.11400938034057617,
1470
+ "rewards/margins": 0.21471650898456573,
1471
+ "rewards/rejected": -0.3287258744239807,
1472
+ "step": 880
1473
+ },
1474
+ {
1475
+ "epoch": 1.8633865480240774,
1476
+ "grad_norm": 31.64887361264221,
1477
+ "learning_rate": 1.3665854824458035e-08,
1478
+ "logits/chosen": 4.322469234466553,
1479
+ "logits/rejected": 4.672883033752441,
1480
+ "logps/chosen": -445.35699462890625,
1481
+ "logps/rejected": -390.5237731933594,
1482
+ "loss": 0.624,
1483
+ "rewards/accuracies": 0.6937500238418579,
1484
+ "rewards/chosen": -0.032880861312150955,
1485
+ "rewards/margins": 0.3543739914894104,
1486
+ "rewards/rejected": -0.38725486397743225,
1487
+ "step": 890
1488
+ },
1489
+ {
1490
+ "epoch": 1.8843234755299658,
1491
+ "grad_norm": 33.85502008551422,
1492
+ "learning_rate": 9.741758728888217e-09,
1493
+ "logits/chosen": 4.4365644454956055,
1494
+ "logits/rejected": 4.837357997894287,
1495
+ "logps/chosen": -472.887451171875,
1496
+ "logps/rejected": -367.82611083984375,
1497
+ "loss": 0.6474,
1498
+ "rewards/accuracies": 0.65625,
1499
+ "rewards/chosen": -0.05196143314242363,
1500
+ "rewards/margins": 0.3476230800151825,
1501
+ "rewards/rejected": -0.39958447217941284,
1502
+ "step": 900
1503
+ },
1504
+ {
1505
+ "epoch": 1.8843234755299658,
1506
+ "eval_logits/chosen": 4.5737175941467285,
1507
+ "eval_logits/rejected": 4.921082496643066,
1508
+ "eval_logps/chosen": -443.622314453125,
1509
+ "eval_logps/rejected": -378.2174377441406,
1510
+ "eval_loss": 0.6692253351211548,
1511
+ "eval_rewards/accuracies": 0.6269841194152832,
1512
+ "eval_rewards/chosen": -0.05897674709558487,
1513
+ "eval_rewards/margins": 0.31128397583961487,
1514
+ "eval_rewards/rejected": -0.37026071548461914,
1515
+ "eval_runtime": 21.3225,
1516
+ "eval_samples_per_second": 93.797,
1517
+ "eval_steps_per_second": 2.955,
1518
+ "step": 900
1519
+ },
1520
+ {
1521
+ "epoch": 1.9052604030358546,
1522
+ "grad_norm": 33.28147635399462,
1523
+ "learning_rate": 6.474868681043577e-09,
1524
+ "logits/chosen": 4.713411808013916,
1525
+ "logits/rejected": 4.913935661315918,
1526
+ "logps/chosen": -384.9287109375,
1527
+ "logps/rejected": -316.16265869140625,
1528
+ "loss": 0.6491,
1529
+ "rewards/accuracies": 0.6625000238418579,
1530
+ "rewards/chosen": -0.02645047940313816,
1531
+ "rewards/margins": 0.40163594484329224,
1532
+ "rewards/rejected": -0.42808642983436584,
1533
+ "step": 910
1534
+ },
1535
+ {
1536
+ "epoch": 1.926197330541743,
1537
+ "grad_norm": 35.875215811609834,
1538
+ "learning_rate": 3.869564046156459e-09,
1539
+ "logits/chosen": 4.6749348640441895,
1540
+ "logits/rejected": 4.898279190063477,
1541
+ "logps/chosen": -441.083740234375,
1542
+ "logps/rejected": -361.4406433105469,
1543
+ "loss": 0.6389,
1544
+ "rewards/accuracies": 0.6499999761581421,
1545
+ "rewards/chosen": -0.0031513571739196777,
1546
+ "rewards/margins": 0.41674357652664185,
1547
+ "rewards/rejected": -0.4198949337005615,
1548
+ "step": 920
1549
+ },
1550
+ {
1551
+ "epoch": 1.9471342580476314,
1552
+ "grad_norm": 32.946064523302205,
1553
+ "learning_rate": 1.929337314139412e-09,
1554
+ "logits/chosen": 4.862700462341309,
1555
+ "logits/rejected": 4.817538261413574,
1556
+ "logps/chosen": -429.21051025390625,
1557
+ "logps/rejected": -370.45745849609375,
1558
+ "loss": 0.6312,
1559
+ "rewards/accuracies": 0.6000000238418579,
1560
+ "rewards/chosen": -0.0764947384595871,
1561
+ "rewards/margins": 0.20215356349945068,
1562
+ "rewards/rejected": -0.2786482870578766,
1563
+ "step": 930
1564
+ },
1565
+ {
1566
+ "epoch": 1.96807118555352,
1567
+ "grad_norm": 37.53766060677335,
1568
+ "learning_rate": 6.567894177967325e-10,
1569
+ "logits/chosen": 5.056074142456055,
1570
+ "logits/rejected": 5.200203895568848,
1571
+ "logps/chosen": -382.3914489746094,
1572
+ "logps/rejected": -319.7542419433594,
1573
+ "loss": 0.6475,
1574
+ "rewards/accuracies": 0.625,
1575
+ "rewards/chosen": -0.012996235862374306,
1576
+ "rewards/margins": 0.2734270989894867,
1577
+ "rewards/rejected": -0.28642335534095764,
1578
+ "step": 940
1579
+ },
1580
+ {
1581
+ "epoch": 1.9890081130594086,
1582
+ "grad_norm": 32.67422145978211,
1583
+ "learning_rate": 5.3626246194704575e-11,
1584
+ "logits/chosen": 4.634739875793457,
1585
+ "logits/rejected": 4.890820503234863,
1586
+ "logps/chosen": -425.7994689941406,
1587
+ "logps/rejected": -344.5509033203125,
1588
+ "loss": 0.6372,
1589
+ "rewards/accuracies": 0.5562499761581421,
1590
+ "rewards/chosen": -0.18660762906074524,
1591
+ "rewards/margins": 0.23620739579200745,
1592
+ "rewards/rejected": -0.4228149950504303,
1593
+ "step": 950
1594
+ },
1595
+ {
1596
+ "epoch": 1.9973828840617638,
1597
+ "step": 954,
1598
+ "total_flos": 0.0,
1599
+ "train_loss": 0.675485389037702,
1600
+ "train_runtime": 5897.7907,
1601
+ "train_samples_per_second": 20.731,
1602
+ "train_steps_per_second": 0.162
1603
+ }
1604
+ ],
1605
+ "logging_steps": 10,
1606
+ "max_steps": 954,
1607
+ "num_input_tokens_seen": 0,
1608
+ "num_train_epochs": 2,
1609
+ "save_steps": 500,
1610
+ "stateful_callbacks": {
1611
+ "TrainerControl": {
1612
+ "args": {
1613
+ "should_epoch_stop": false,
1614
+ "should_evaluate": false,
1615
+ "should_log": false,
1616
+ "should_save": false,
1617
+ "should_training_stop": false
1618
+ },
1619
+ "attributes": {}
1620
+ }
1621
+ },
1622
+ "total_flos": 0.0,
1623
+ "train_batch_size": 2,
1624
+ "trial_name": null,
1625
+ "trial_params": null
1626
+ }
ComfyUI/models/smol/SmolLM2-135M-Instruct/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90f508e5769b31070c2c8f82e7ecdce816d610763df89479b5258bc66ee8b357
3
+ size 6520
ComfyUI/models/smol/SmolLM2-135M-Instruct/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-360M-Instruct/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
ComfyUI/models/smol/SmolLM2-360M-Instruct/README.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ language:
5
+ - en
6
+ pipeline_tag: text-generation
7
+ tags:
8
+ - safetensors
9
+ - onnx
10
+ - transformers.js
11
+ ---
12
+
13
+
14
+ # SmolLM2
15
+
16
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/oWWfzW4RbWkVIo7f-5444.png)
17
+
18
+ ## Table of Contents
19
+
20
+ 1. [Model Summary](##model-summary)
21
+ 2. [Limitations](##limitations)
22
+ 3. [Training](##training)
23
+ 4. [License](##license)
24
+ 5. [Citation](##citation)
25
+
26
+ ## Model Summary
27
+
28
+ SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
29
+
30
+ SmolLM2 demonstrates significant advances over its predecessor SmolLM1, particularly in instruction following, knowledge, reasoning. The 360M model was trained on 4 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new filtered datasets we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using [UltraFeedback](https://huggingface.co/datasets/HuggingFaceH4/ultrafeedback_binarized).
31
+
32
+ The instruct model additionally supports tasks such as text rewriting, summarization and function calling (for the 1.7B) thanks to datasets developed by [Argilla](https://huggingface.co/argilla) such as [Synth-APIGen-v0.1](https://huggingface.co/datasets/argilla/Synth-APIGen-v0.1).
33
+ You can find the SFT dataset here: https://huggingface.co/datasets/HuggingFaceTB/smol-smoltalk and finetuning code in the [ali] https://github.com/huggingface/alignment-handbook/tree/main/recipes/smollm2
34
+
35
+ For more details refer to: https://github.com/huggingface/smollm. You will find pre-training, post-training, evaluation and local inference code.
36
+
37
+
38
+ ### How to use
39
+
40
+ ### Transformers
41
+ ```bash
42
+ pip install transformers
43
+ ```
44
+
45
+ ```python
46
+ from transformers import AutoModelForCausalLM, AutoTokenizer
47
+ checkpoint = "HuggingFaceTB/SmolLM2-360M-Instruct"
48
+
49
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
50
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
51
+ # for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
52
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
53
+
54
+ messages = [{"role": "user", "content": "What is the capital of France."}]
55
+ input_text=tokenizer.apply_chat_template(messages, tokenize=False)
56
+ print(input_text)
57
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
58
+ outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
59
+ print(tokenizer.decode(outputs[0]))
60
+ ```
61
+
62
+ ### Chat in TRL
63
+ You can also use the TRL CLI to chat with the model from the terminal:
64
+ ```bash
65
+ pip install trl
66
+ trl chat --model_name_or_path HuggingFaceTB/SmolLM2-360M-Instruct --device cpu
67
+ ```
68
+
69
+ ## Evaluation
70
+
71
+ In this section, we report the evaluation results of SmolLM2. All evaluations are zero-shot unless stated otherwise, and we use [lighteval](https://github.com/huggingface/lighteval) to run them.
72
+
73
+ ## Base Pre-Trained Model
74
+
75
+ | Metrics | SmolLM2-360M | Qwen2.5-0.5B | SmolLM-360M |
76
+ |:-------------------|:------------:|:------------:|:------------:|
77
+ | HellaSwag | **54.5** | 51.2 | 51.8 |
78
+ | ARC (Average) | **53.0** | 45.4 | 50.1 |
79
+ | PIQA | **71.7** | 69.9 | 71.6 |
80
+ | MMLU (cloze) | **35.8** | 33.7 | 34.4 |
81
+ | CommonsenseQA | **38.0** | 31.6 | 35.3 |
82
+ | TriviaQA | **16.9** | 4.3 | 9.1 |
83
+ | Winogrande | 52.5 | **54.1** | 52.8 |
84
+ | OpenBookQA | **37.4** | **37.4** | 37.2 |
85
+ | GSM8K (5-shot) | 3.2 | **33.4** | 1.6 |
86
+
87
+
88
+ ## Instruction Model
89
+
90
+ | Metric | SmolLM2-360M-Instruct | Qwen2.5-0.5B-Instruct | SmolLM-360M-Instruct |
91
+ |:-----------------------------|:---------------------:|:---------------------:|:---------------------:|
92
+ | IFEval (Average prompt/inst) | **41.0** | 31.6 | 19.8 |
93
+ | MT-Bench | 3.66 | **4.16** | 3.37 |
94
+ | HellaSwag | **52.1** | 48.0 | 47.9 |
95
+ | ARC (Average) | **43.7** | 37.3 | 38.8 |
96
+ | PIQA | **70.8** | 67.2 | 69.4 |
97
+ | MMLU (cloze) | **32.8** | 31.7 | 30.6 |
98
+ | BBH (3-shot) | 27.3 | **30.7** | 24.4 |
99
+ | GSM8K (5-shot) | 7.43 | **26.8** | 1.36 |
100
+
101
+
102
+ ## Limitations
103
+
104
+ SmolLM2 models primarily understand and generate content in English. They can produce text on a variety of topics, but the generated content may not always be factually accurate, logically consistent, or free from biases present in the training data. These models should be used as assistive tools rather than definitive sources of information. Users should always verify important information and critically evaluate any generated content.
105
+
106
+ ## Training
107
+
108
+ ### Model
109
+
110
+ - **Architecture:** Transformer decoder
111
+ - **Pretraining tokens:** 4T
112
+ - **Precision:** bfloat16
113
+
114
+ ### Hardware
115
+
116
+ - **GPUs:** 64 H100
117
+
118
+ ### Software
119
+
120
+ - **Training Framework:** [nanotron](https://github.com/huggingface/nanotron/tree/main)
121
+
122
+ ## License
123
+
124
+ [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
125
+
126
+ ## Citation
127
+ ```bash
128
+ @misc{allal2024SmolLM2,
129
+ title={SmolLM2 - with great data, comes great performance},
130
+ author={Loubna Ben Allal and Anton Lozhkov and Elie Bakouch and Gabriel Martín Blázquez and Lewis Tunstall and Agustín Piqueres and Andres Marafioti and Cyril Zakka and Leandro von Werra and Thomas Wolf},
131
+ year={2024},
132
+ }
133
+ ```
ComfyUI/models/smol/SmolLM2-360M-Instruct/all_results.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9973828840617638,
3
+ "eval_logits/chosen": -1.6407532691955566,
4
+ "eval_logits/rejected": -1.6968854665756226,
5
+ "eval_logps/chosen": -375.6463623046875,
6
+ "eval_logps/rejected": -323.7197570800781,
7
+ "eval_loss": 0.6348475217819214,
8
+ "eval_rewards/accuracies": 0.6190476417541504,
9
+ "eval_rewards/chosen": -0.034213583916425705,
10
+ "eval_rewards/margins": 0.3567626178264618,
11
+ "eval_rewards/rejected": -0.3909761905670166,
12
+ "eval_runtime": 22.3598,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 89.446,
15
+ "eval_steps_per_second": 2.818,
16
+ "total_flos": 0.0,
17
+ "train_loss": 0.6516540072998911,
18
+ "train_runtime": 5944.7081,
19
+ "train_samples": 61134,
20
+ "train_samples_per_second": 20.568,
21
+ "train_steps_per_second": 0.16
22
+ }
ComfyUI/models/smol/SmolLM2-360M-Instruct/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 960,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 2560,
13
+ "is_llama_config": true,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 15,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 5,
20
+ "pad_token_id": 2,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_interleaved": false,
24
+ "rope_scaling": null,
25
+ "rope_theta": 100000,
26
+ "tie_word_embeddings": true,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.42.3",
29
+ "transformers.js_config": {
30
+ "kv_cache_dtype": {
31
+ "q4f16": "float16",
32
+ "fp16": "float16"
33
+ }
34
+ },
35
+ "use_cache": true,
36
+ "vocab_size": 49152
37
+ }
ComfyUI/models/smol/SmolLM2-360M-Instruct/eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.9973828840617638,
3
+ "eval_logits/chosen": -1.6407532691955566,
4
+ "eval_logits/rejected": -1.6968854665756226,
5
+ "eval_logps/chosen": -375.6463623046875,
6
+ "eval_logps/rejected": -323.7197570800781,
7
+ "eval_loss": 0.6348475217819214,
8
+ "eval_rewards/accuracies": 0.6190476417541504,
9
+ "eval_rewards/chosen": -0.034213583916425705,
10
+ "eval_rewards/margins": 0.3567626178264618,
11
+ "eval_rewards/rejected": -0.3909761905670166,
12
+ "eval_runtime": 22.3598,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 89.446,
15
+ "eval_steps_per_second": 2.818
16
+ }
ComfyUI/models/smol/SmolLM2-360M-Instruct/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 2,
6
+ "transformers_version": "4.42.3"
7
+ }
ComfyUI/models/smol/SmolLM2-360M-Instruct/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ComfyUI/models/smol/SmolLM2-360M-Instruct/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6bffe7435d7ddc10fd3b9a9efd429dafbacb1cb17015fb5562664e7532bf86e
3
+ size 723674912
ComfyUI/models/smol/SmolLM2-360M-Instruct/model.safetensors.baiduyun.uploading.cfg ADDED
File without changes
ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-01-58_ip-26-0-172-142/events.out.tfevents.1730365788.ip-26-0-172-142.451351.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a59625dee7fb61068e6a6ea53682ee0d91da3f92189019d5ceed597d21ebf27
3
+ size 78139
ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-01-58_ip-26-0-172-142/events.out.tfevents.1730371773.ip-26-0-172-142.451351.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac5691a0cdfdd383d29b69e85c4aaf632c695ce8fd31683bb9512f310fbf1250
3
+ size 828
ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-19-57_ip-26-0-174-36/events.out.tfevents.1730366818.ip-26-0-174-36.3233632.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44f4f653d8d22e5db793f6743dc486f1d3f0919a66d5bbd69ebfd22b9e1f598b
3
+ size 38996
ComfyUI/models/smol/SmolLM2-360M-Instruct/runs/Oct31_09-20-43_ip-26-0-161-142/events.out.tfevents.1730366856.ip-26-0-161-142.1301887.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:684365d127b952fdfd215e337f4f8dc5bc4d37ef88fb608632d244ec6f86a449
3
+ size 37621
ComfyUI/models/smol/SmolLM2-360M-Instruct/special_tokens_map.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<|im_start|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "<|im_end|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "unk_token": {
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }