YangZhoumill
/

Qwen2.5-0.5B-Instruct

@@ -1,19 +1,17 @@
 ---
 base_model: Qwen/Qwen2.5-0.5B-Instruct
-datasets: YangZhoumill/bestofn
 library_name: transformers
-model_name: Qwen2.5-0.5B-Instruct-4230297
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - sft
 licence: license
 ---
-# Model Card for Qwen2.5-0.5B-Instruct-4230297
-This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) on the [YangZhoumill/bestofn](https://huggingface.co/datasets/YangZhoumill/bestofn) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -29,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/uag1gn9n)
 This model was trained with SFT.

 ---
 base_model: Qwen/Qwen2.5-0.5B-Instruct
 library_name: transformers
+model_name: Qwen2.5-0.5B-Instruct
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 ---
+# Model Card for Qwen2.5-0.5B-Instruct
+This model is a fine-tuned version of [Qwen/Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/stevenzhou0816100/huggingface/runs/qm9rbkf1)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 3.486616950459597e+16,
-    "train_loss": 0.9243612613677978,
-    "train_runtime": 251.2658,
-    "train_samples": 7473,
-    "train_samples_per_second": 3.98,
-    "train_steps_per_second": 0.497
 }

 {
+    "total_flos": 4.211382991139635e+16,
+    "train_loss": 0.7568846257527669,
+    "train_runtime": 323.9308,
+    "train_samples": 9308,
+    "train_samples_per_second": 3.704,
+    "train_steps_per_second": 0.463
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 3.486616950459597e+16,
-    "train_loss": 0.9243612613677978,
-    "train_runtime": 251.2658,
-    "train_samples": 7473,
-    "train_samples_per_second": 3.98,
-    "train_steps_per_second": 0.497
 }

 {
+    "total_flos": 4.211382991139635e+16,
+    "train_loss": 0.7568846257527669,
+    "train_runtime": 323.9308,
+    "train_samples": 9308,
+    "train_samples_per_second": 3.704,
+    "train_steps_per_second": 0.463
 }

trainer_state.json CHANGED Viewed

@@ -4,198 +4,233 @@
   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.04,
-      "grad_norm": 59.25,
-      "learning_rate": 2.857142857142857e-05,
-      "loss": 3.2542,
       "step": 5
     },
     {
-      "epoch": 0.08,
-      "grad_norm": 9.75,
-      "learning_rate": 4.996811065272715e-05,
-      "loss": 1.5907,
       "step": 10
     },
     {
-      "epoch": 0.12,
-      "grad_norm": 7.21875,
-      "learning_rate": 4.9610392803331726e-05,
-      "loss": 1.2725,
       "step": 15
     },
     {
-      "epoch": 0.16,
-      "grad_norm": 6.21875,
-      "learning_rate": 4.8861446190538576e-05,
-      "loss": 1.1719,
       "step": 20
     },
     {
-      "epoch": 0.2,
-      "grad_norm": 6.84375,
-      "learning_rate": 4.7734522928852436e-05,
-      "loss": 1.1011,
       "step": 25
     },
     {
-      "epoch": 0.24,
-      "grad_norm": 7.8125,
-      "learning_rate": 4.624956317935659e-05,
-      "loss": 1.0381,
       "step": 30
     },
     {
-      "epoch": 0.28,
-      "grad_norm": 33.25,
-      "learning_rate": 4.443284232176311e-05,
-      "loss": 0.9895,
       "step": 35
     },
     {
-      "epoch": 0.32,
-      "grad_norm": 62.0,
-      "learning_rate": 4.2316506028963374e-05,
-      "loss": 0.9336,
       "step": 40
     },
     {
-      "epoch": 0.36,
-      "grad_norm": 57.75,
-      "learning_rate": 3.993800147062685e-05,
-      "loss": 0.9134,
       "step": 45
     },
     {
-      "epoch": 0.4,
-      "grad_norm": 17.375,
-      "learning_rate": 3.733941471032425e-05,
-      "loss": 0.8617,
       "step": 50
     },
     {
-      "epoch": 0.44,
-      "grad_norm": 92.0,
-      "learning_rate": 3.4566726020493854e-05,
-      "loss": 0.8964,
       "step": 55
     },
     {
-      "epoch": 0.48,
-      "grad_norm": 8.9375,
-      "learning_rate": 3.1668996291960073e-05,
-      "loss": 0.7954,
       "step": 60
     },
     {
-      "epoch": 0.52,
-      "grad_norm": 12.6875,
-      "learning_rate": 2.869749893394902e-05,
-      "loss": 0.7644,
       "step": 65
     },
     {
-      "epoch": 0.56,
-      "grad_norm": 25.5,
-      "learning_rate": 2.570481262505563e-05,
-      "loss": 0.7105,
       "step": 70
     },
     {
-      "epoch": 0.6,
-      "grad_norm": 12.0625,
-      "learning_rate": 2.2743890968333453e-05,
-      "loss": 0.6844,
       "step": 75
     },
     {
-      "epoch": 0.64,
-      "grad_norm": 11.125,
-      "learning_rate": 1.986712551234432e-05,
-      "loss": 0.673,
       "step": 80
     },
     {
-      "epoch": 0.68,
-      "grad_norm": 10.0,
-      "learning_rate": 1.7125418717390167e-05,
-      "loss": 0.6426,
       "step": 85
     },
     {
-      "epoch": 0.72,
-      "grad_norm": 9.6875,
-      "learning_rate": 1.4567283270175847e-05,
-      "loss": 0.6182,
       "step": 90
     },
     {
-      "epoch": 0.76,
-      "grad_norm": 11.25,
-      "learning_rate": 1.2237983683933638e-05,
-      "loss": 0.6088,
       "step": 95
     },
     {
-      "epoch": 0.8,
-      "grad_norm": 11.25,
-      "learning_rate": 1.0178735372827107e-05,
-      "loss": 0.6127,
       "step": 100
     },
     {
-      "epoch": 0.84,
-      "grad_norm": 9.75,
-      "learning_rate": 8.425975372482405e-06,
-      "loss": 0.5978,
       "step": 105
     },
     {
-      "epoch": 0.88,
-      "grad_norm": 13.6875,
-      "learning_rate": 7.010717610764453e-06,
-      "loss": 0.6071,
       "step": 110
     },
     {
-      "epoch": 0.92,
       "grad_norm": 10.25,
-      "learning_rate": 5.9580041368548775e-06,
-      "loss": 0.5962,
       "step": 115
     },
     {
-      "epoch": 0.96,
-      "grad_norm": 9.3125,
-      "learning_rate": 5.286462018769748e-06,
-      "loss": 0.5897,
       "step": 120
     },
     {
-      "epoch": 1.0,
-      "grad_norm": 11.0625,
-      "learning_rate": 5.007973749722316e-06,
-      "loss": 0.5851,
       "step": 125
     },
     {
       "epoch": 1.0,
-      "step": 125,
-      "total_flos": 3.486616950459597e+16,
-      "train_loss": 0.9243612613677978,
-      "train_runtime": 251.2658,
-      "train_samples_per_second": 3.98,
-      "train_steps_per_second": 0.497
     }
   ],
   "logging_steps": 5,
-  "max_steps": 125,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
@@ -211,7 +246,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.486616950459597e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_model_checkpoint": null,
   "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.03333333333333333,
+      "grad_norm": 61.0,
+      "learning_rate": 2.5e-05,
+      "loss": 3.1111,
       "step": 5
     },
     {
+      "epoch": 0.06666666666666667,
+      "grad_norm": 12.75,
+      "learning_rate": 4.9994493725417974e-05,
+      "loss": 1.4101,
       "step": 10
     },
     {
+      "epoch": 0.1,
+      "grad_norm": 7.59375,
+      "learning_rate": 4.980205694523683e-05,
+      "loss": 1.0893,
       "step": 15
     },
     {
+      "epoch": 0.13333333333333333,
+      "grad_norm": 376.0,
+      "learning_rate": 4.933699554028027e-05,
+      "loss": 1.0086,
       "step": 20
     },
     {
+      "epoch": 0.16666666666666666,
+      "grad_norm": 6.90625,
+      "learning_rate": 4.8604994510426774e-05,
+      "loss": 0.9507,
       "step": 25
     },
     {
+      "epoch": 0.2,
+      "grad_norm": 6.8125,
+      "learning_rate": 4.761500197676621e-05,
+      "loss": 0.9555,
       "step": 30
     },
     {
+      "epoch": 0.23333333333333334,
+      "grad_norm": 9.125,
+      "learning_rate": 4.63791197980501e-05,
+      "loss": 0.8328,
       "step": 35
     },
     {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 10.125,
+      "learning_rate": 4.49124556352474e-05,
+      "loss": 0.8174,
       "step": 40
     },
     {
+      "epoch": 0.3,
+      "grad_norm": 8.8125,
+      "learning_rate": 4.323293827259707e-05,
+      "loss": 0.7522,
       "step": 45
     },
     {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 8.4375,
+      "learning_rate": 4.13610984527082e-05,
+      "loss": 0.7246,
       "step": 50
     },
     {
+      "epoch": 0.36666666666666664,
+      "grad_norm": 13.875,
+      "learning_rate": 3.931981790482172e-05,
+      "loss": 0.7451,
       "step": 55
     },
     {
+      "epoch": 0.4,
+      "grad_norm": 13.125,
+      "learning_rate": 3.713404963416025e-05,
+      "loss": 0.671,
       "step": 60
     },
     {
+      "epoch": 0.43333333333333335,
+      "grad_norm": 59.0,
+      "learning_rate": 3.483051289160265e-05,
+      "loss": 0.6582,
       "step": 65
     },
     {
+      "epoch": 0.4666666666666667,
+      "grad_norm": 10.0,
+      "learning_rate": 3.243736655243287e-05,
+      "loss": 0.6431,
       "step": 70
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 15.375,
+      "learning_rate": 2.9983864896843578e-05,
+      "loss": 0.5837,
       "step": 75
     },
     {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 22.0,
+      "learning_rate": 2.7500000000000004e-05,
+      "loss": 0.6217,
       "step": 80
     },
     {
+      "epoch": 0.5666666666666667,
+      "grad_norm": 14.5,
+      "learning_rate": 2.5016135103156434e-05,
+      "loss": 0.5935,
       "step": 85
     },
     {
+      "epoch": 0.6,
+      "grad_norm": 13.8125,
+      "learning_rate": 2.2562633447567137e-05,
+      "loss": 0.563,
       "step": 90
     },
     {
+      "epoch": 0.6333333333333333,
+      "grad_norm": 9.4375,
+      "learning_rate": 2.0169487108397363e-05,
+      "loss": 0.5695,
       "step": 95
     },
     {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 63.0,
+      "learning_rate": 1.7865950365839762e-05,
+      "loss": 0.5332,
       "step": 100
     },
     {
+      "epoch": 0.7,
+      "grad_norm": 14.4375,
+      "learning_rate": 1.568018209517828e-05,
+      "loss": 0.5306,
       "step": 105
     },
     {
+      "epoch": 0.7333333333333333,
+      "grad_norm": 10.75,
+      "learning_rate": 1.3638901547291804e-05,
+      "loss": 0.4998,
       "step": 110
     },
     {
+      "epoch": 0.7666666666666667,
       "grad_norm": 10.25,
+      "learning_rate": 1.1767061727402935e-05,
+      "loss": 0.4856,
       "step": 115
     },
     {
+      "epoch": 0.8,
+      "grad_norm": 16.625,
+      "learning_rate": 1.0087544364752604e-05,
+      "loss": 0.4837,
       "step": 120
     },
     {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 15.0,
+      "learning_rate": 8.62088020194991e-06,
+      "loss": 0.4811,
       "step": 125
     },
+    {
+      "epoch": 0.8666666666666667,
+      "grad_norm": 9.375,
+      "learning_rate": 7.3849980232337995e-06,
+      "loss": 0.4875,
+      "step": 130
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 8.75,
+      "learning_rate": 6.3950054895732334e-06,
+      "loss": 0.4877,
+      "step": 135
+    },
+    {
+      "epoch": 0.9333333333333333,
+      "grad_norm": 11.5625,
+      "learning_rate": 5.663004459719738e-06,
+      "loss": 0.4812,
+      "step": 140
+    },
+    {
+      "epoch": 0.9666666666666667,
+      "grad_norm": 11.0,
+      "learning_rate": 5.197943054763173e-06,
+      "loss": 0.4589,
+      "step": 145
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 10.375,
+      "learning_rate": 5.005506274582033e-06,
+      "loss": 0.4762,
+      "step": 150
+    },
     {
       "epoch": 1.0,
+      "step": 150,
+      "total_flos": 4.211382991139635e+16,
+      "train_loss": 0.7568846257527669,
+      "train_runtime": 323.9308,
+      "train_samples_per_second": 3.704,
+      "train_steps_per_second": 0.463
     }
   ],
   "logging_steps": 5,
+  "max_steps": 150,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 4.211382991139635e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null