Model save
Browse files- README.md +1 -1
- all_results.json +4 -4
- train_results.json +4 -4
- trainer_state.json +24 -8
README.md
CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/cosineai/deepseek-reproduce/runs/
|
31 |
|
32 |
|
33 |
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/cosineai/deepseek-reproduce/runs/mr9qnll8)
|
31 |
|
32 |
|
33 |
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"total_flos": 0.0,
|
3 |
-
"train_loss":
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 6725,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"total_flos": 0.0,
|
3 |
+
"train_loss": 3.1328258249329856e-06,
|
4 |
+
"train_runtime": 38.497,
|
5 |
"train_samples": 6725,
|
6 |
+
"train_samples_per_second": 311.713,
|
7 |
+
"train_steps_per_second": 6.494
|
8 |
}
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"total_flos": 0.0,
|
3 |
-
"train_loss":
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 6725,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"total_flos": 0.0,
|
3 |
+
"train_loss": 3.1328258249329856e-06,
|
4 |
+
"train_runtime": 38.497,
|
5 |
"train_samples": 6725,
|
6 |
+
"train_samples_per_second": 311.713,
|
7 |
+
"train_steps_per_second": 6.494
|
8 |
}
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4009,13 +4009,29 @@
|
|
4009 |
"step": 250
|
4010 |
},
|
4011 |
{
|
4012 |
-
"
|
4013 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4014 |
"total_flos": 0.0,
|
4015 |
-
"train_loss":
|
4016 |
-
"train_runtime":
|
4017 |
-
"train_samples_per_second":
|
4018 |
-
"train_steps_per_second":
|
4019 |
}
|
4020 |
],
|
4021 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.2985873605947955,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 251,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4009 |
"step": 250
|
4010 |
},
|
4011 |
{
|
4012 |
+
"completion_length": 171.3333396911621,
|
4013 |
+
"epoch": 0.2985873605947955,
|
4014 |
+
"grad_norm": 0.017227069093127906,
|
4015 |
+
"kl": 0.01967620849609375,
|
4016 |
+
"learning_rate": 2.43689976739403e-10,
|
4017 |
+
"loss": 0.0008,
|
4018 |
+
"reward": 0.5448333483655006,
|
4019 |
+
"reward_std": 1.0098017808049917,
|
4020 |
+
"rewards/correctness_reward_func": 0.4583333469927311,
|
4021 |
+
"rewards/int_reward_func": 0.11458333674818277,
|
4022 |
+
"rewards/soft_format_reward_func": 0.0,
|
4023 |
+
"rewards/strict_format_reward_func": 0.0,
|
4024 |
+
"rewards/xmlcount_reward_func": -0.028083334676921368,
|
4025 |
+
"step": 251
|
4026 |
+
},
|
4027 |
+
{
|
4028 |
+
"epoch": 0.2985873605947955,
|
4029 |
+
"step": 251,
|
4030 |
"total_flos": 0.0,
|
4031 |
+
"train_loss": 3.1328258249329856e-06,
|
4032 |
+
"train_runtime": 38.497,
|
4033 |
+
"train_samples_per_second": 311.713,
|
4034 |
+
"train_steps_per_second": 6.494
|
4035 |
}
|
4036 |
],
|
4037 |
"logging_steps": 1,
|