AlistairPullen commited on
Commit
db053a3
·
verified ·
1 Parent(s): 550aaf9

Model save

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +24 -8
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/cosineai/deepseek-reproduce/runs/sxqwipa8)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/cosineai/deepseek-reproduce/runs/mr9qnll8)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0007357805297391096,
4
- "train_runtime": 5420.3326,
5
  "train_samples": 6725,
6
- "train_samples_per_second": 2.214,
7
- "train_steps_per_second": 0.046
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 3.1328258249329856e-06,
4
+ "train_runtime": 38.497,
5
  "train_samples": 6725,
6
+ "train_samples_per_second": 311.713,
7
+ "train_steps_per_second": 6.494
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.0007357805297391096,
4
- "train_runtime": 5420.3326,
5
  "train_samples": 6725,
6
- "train_samples_per_second": 2.214,
7
- "train_steps_per_second": 0.046
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 3.1328258249329856e-06,
4
+ "train_runtime": 38.497,
5
  "train_samples": 6725,
6
+ "train_samples_per_second": 311.713,
7
+ "train_steps_per_second": 6.494
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.29739776951672864,
5
  "eval_steps": 500,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4009,13 +4009,29 @@
4009
  "step": 250
4010
  },
4011
  {
4012
- "epoch": 0.29739776951672864,
4013
- "step": 250,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4014
  "total_flos": 0.0,
4015
- "train_loss": 0.0007357805297391096,
4016
- "train_runtime": 5420.3326,
4017
- "train_samples_per_second": 2.214,
4018
- "train_steps_per_second": 0.046
4019
  }
4020
  ],
4021
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2985873605947955,
5
  "eval_steps": 500,
6
+ "global_step": 251,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4009
  "step": 250
4010
  },
4011
  {
4012
+ "completion_length": 171.3333396911621,
4013
+ "epoch": 0.2985873605947955,
4014
+ "grad_norm": 0.017227069093127906,
4015
+ "kl": 0.01967620849609375,
4016
+ "learning_rate": 2.43689976739403e-10,
4017
+ "loss": 0.0008,
4018
+ "reward": 0.5448333483655006,
4019
+ "reward_std": 1.0098017808049917,
4020
+ "rewards/correctness_reward_func": 0.4583333469927311,
4021
+ "rewards/int_reward_func": 0.11458333674818277,
4022
+ "rewards/soft_format_reward_func": 0.0,
4023
+ "rewards/strict_format_reward_func": 0.0,
4024
+ "rewards/xmlcount_reward_func": -0.028083334676921368,
4025
+ "step": 251
4026
+ },
4027
+ {
4028
+ "epoch": 0.2985873605947955,
4029
+ "step": 251,
4030
  "total_flos": 0.0,
4031
+ "train_loss": 3.1328258249329856e-06,
4032
+ "train_runtime": 38.497,
4033
+ "train_samples_per_second": 311.713,
4034
+ "train_steps_per_second": 6.494
4035
  }
4036
  ],
4037
  "logging_steps": 1,