bespokelabs
/

qwen3-8b-dabstep-reasoning-108-fixed-reasoning-sharegpt-sft

@@ -4,6 +4,7 @@ license: apache-2.0
 base_model: Qwen/Qwen3-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: qwen3-8b-dabstep-reasoning-108-fixed-reasoning-sharegpt-sft
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # qwen3-8b-dabstep-reasoning-108-fixed-reasoning-sharegpt-sft
-This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on an unknown dataset.
 ## Model description

 base_model: Qwen/Qwen3-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: qwen3-8b-dabstep-reasoning-108-fixed-reasoning-sharegpt-sft
 # qwen3-8b-dabstep-reasoning-108-fixed-reasoning-sharegpt-sft
+This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the eval-ds-dabstep-reasoning-108-fixed-reasoning-sharegpt dataset.
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "total_flos": 12383961956352.0,
+    "train_loss": 0.45606403878101937,
+    "train_runtime": 1434.1495,
+    "train_samples_per_second": 0.352,
+    "train_steps_per_second": 0.045
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "total_flos": 12383961956352.0,
+    "train_loss": 0.45606403878101937,
+    "train_runtime": 1434.1495,
+    "train_samples_per_second": 0.352,
+    "train_steps_per_second": 0.045
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,498 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 65,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.07692307692307693,
+      "grad_norm": 6.370218968023409,
+      "learning_rate": 0.0,
+      "loss": 0.9617,
+      "step": 1
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 7.062823048926537,
+      "learning_rate": 1e-05,
+      "loss": 0.9221,
+      "step": 2
+    },
+    {
+      "epoch": 0.23076923076923078,
+      "grad_norm": 3.508151848769913,
+      "learning_rate": 9.993977281025862e-06,
+      "loss": 0.8516,
+      "step": 3
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 2.1315111375920908,
+      "learning_rate": 9.975923633360985e-06,
+      "loss": 0.7455,
+      "step": 4
+    },
+    {
+      "epoch": 0.38461538461538464,
+      "grad_norm": 2.067690304064468,
+      "learning_rate": 9.945882549823906e-06,
+      "loss": 0.7818,
+      "step": 5
+    },
+    {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 2.164959330267267,
+      "learning_rate": 9.903926402016153e-06,
+      "loss": 0.7056,
+      "step": 6
+    },
+    {
+      "epoch": 0.5384615384615384,
+      "grad_norm": 1.7114510182271578,
+      "learning_rate": 9.850156265972722e-06,
+      "loss": 0.6684,
+      "step": 7
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 1.4221134607922832,
+      "learning_rate": 9.784701678661045e-06,
+      "loss": 0.6943,
+      "step": 8
+    },
+    {
+      "epoch": 0.6923076923076923,
+      "grad_norm": 1.2054367990274877,
+      "learning_rate": 9.707720325915105e-06,
+      "loss": 0.6606,
+      "step": 9
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 1.234597606787108,
+      "learning_rate": 9.619397662556434e-06,
+      "loss": 0.662,
+      "step": 10
+    },
+    {
+      "epoch": 0.8461538461538461,
+      "grad_norm": 1.0253610984337118,
+      "learning_rate": 9.519946465617217e-06,
+      "loss": 0.6118,
+      "step": 11
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.9531385105738269,
+      "learning_rate": 9.409606321741776e-06,
+      "loss": 0.6148,
+      "step": 12
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.8797592723988117,
+      "learning_rate": 9.288643050001362e-06,
+      "loss": 0.5794,
+      "step": 13
+    },
+    {
+      "epoch": 1.0769230769230769,
+      "grad_norm": 0.8614478397135895,
+      "learning_rate": 9.157348061512728e-06,
+      "loss": 0.5439,
+      "step": 14
+    },
+    {
+      "epoch": 1.1538461538461537,
+      "grad_norm": 0.8816737566077302,
+      "learning_rate": 9.016037657403225e-06,
+      "loss": 0.5074,
+      "step": 15
+    },
+    {
+      "epoch": 1.2307692307692308,
+      "grad_norm": 0.8669076102244132,
+      "learning_rate": 8.865052266813686e-06,
+      "loss": 0.5223,
+      "step": 16
+    },
+    {
+      "epoch": 1.3076923076923077,
+      "grad_norm": 0.8101412038742847,
+      "learning_rate": 8.704755626774796e-06,
+      "loss": 0.5032,
+      "step": 17
+    },
+    {
+      "epoch": 1.3846153846153846,
+      "grad_norm": 0.8569082435438978,
+      "learning_rate": 8.535533905932739e-06,
+      "loss": 0.5523,
+      "step": 18
+    },
+    {
+      "epoch": 1.4615384615384617,
+      "grad_norm": 0.7453930156975321,
+      "learning_rate": 8.357794774235094e-06,
+      "loss": 0.49,
+      "step": 19
+    },
+    {
+      "epoch": 1.5384615384615383,
+      "grad_norm": 0.6283060582206399,
+      "learning_rate": 8.171966420818227e-06,
+      "loss": 0.4874,
+      "step": 20
+    },
+    {
+      "epoch": 1.6153846153846154,
+      "grad_norm": 0.8023986095300789,
+      "learning_rate": 7.978496522462167e-06,
+      "loss": 0.4622,
+      "step": 21
+    },
+    {
+      "epoch": 1.6923076923076923,
+      "grad_norm": 0.7122469220028385,
+      "learning_rate": 7.777851165098012e-06,
+      "loss": 0.4828,
+      "step": 22
+    },
+    {
+      "epoch": 1.7692307692307692,
+      "grad_norm": 0.5943006237807561,
+      "learning_rate": 7.570513720966108e-06,
+      "loss": 0.4552,
+      "step": 23
+    },
+    {
+      "epoch": 1.8461538461538463,
+      "grad_norm": 0.7102435889730987,
+      "learning_rate": 7.3569836841299905e-06,
+      "loss": 0.4797,
+      "step": 24
+    },
+    {
+      "epoch": 1.9230769230769231,
+      "grad_norm": 0.7244490320083123,
+      "learning_rate": 7.137775467151411e-06,
+      "loss": 0.4787,
+      "step": 25
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.6176632029398068,
+      "learning_rate": 6.913417161825449e-06,
+      "loss": 0.4329,
+      "step": 26
+    },
+    {
+      "epoch": 2.076923076923077,
+      "grad_norm": 0.8216799021152335,
+      "learning_rate": 6.684449266961101e-06,
+      "loss": 0.3743,
+      "step": 27
+    },
+    {
+      "epoch": 2.1538461538461537,
+      "grad_norm": 0.6005917289643109,
+      "learning_rate": 6.451423386272312e-06,
+      "loss": 0.4369,
+      "step": 28
+    },
+    {
+      "epoch": 2.230769230769231,
+      "grad_norm": 0.624537941070379,
+      "learning_rate": 6.21490089951632e-06,
+      "loss": 0.4317,
+      "step": 29
+    },
+    {
+      "epoch": 2.3076923076923075,
+      "grad_norm": 0.6577500844311218,
+      "learning_rate": 5.975451610080643e-06,
+      "loss": 0.4278,
+      "step": 30
+    },
+    {
+      "epoch": 2.3846153846153846,
+      "grad_norm": 0.5918326665297654,
+      "learning_rate": 5.733652372276809e-06,
+      "loss": 0.444,
+      "step": 31
+    },
+    {
+      "epoch": 2.4615384615384617,
+      "grad_norm": 0.7154936597726321,
+      "learning_rate": 5.490085701647805e-06,
+      "loss": 0.3575,
+      "step": 32
+    },
+    {
+      "epoch": 2.5384615384615383,
+      "grad_norm": 0.7058375820227064,
+      "learning_rate": 5.245338371637091e-06,
+      "loss": 0.3927,
+      "step": 33
+    },
+    {
+      "epoch": 2.6153846153846154,
+      "grad_norm": 0.624999538418926,
+      "learning_rate": 5e-06,
+      "loss": 0.4115,
+      "step": 34
+    },
+    {
+      "epoch": 2.6923076923076925,
+      "grad_norm": 0.6189527167900223,
+      "learning_rate": 4.75466162836291e-06,
+      "loss": 0.393,
+      "step": 35
+    },
+    {
+      "epoch": 2.769230769230769,
+      "grad_norm": 0.6581614058929585,
+      "learning_rate": 4.509914298352197e-06,
+      "loss": 0.3589,
+      "step": 36
+    },
+    {
+      "epoch": 2.8461538461538463,
+      "grad_norm": 0.5968832668044702,
+      "learning_rate": 4.266347627723192e-06,
+      "loss": 0.4319,
+      "step": 37
+    },
+    {
+      "epoch": 2.9230769230769234,
+      "grad_norm": 0.6829725248958235,
+      "learning_rate": 4.02454838991936e-06,
+      "loss": 0.3402,
+      "step": 38
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.6835108217400175,
+      "learning_rate": 3.7850991004836813e-06,
+      "loss": 0.3331,
+      "step": 39
+    },
+    {
+      "epoch": 3.076923076923077,
+      "grad_norm": 0.7315492211655491,
+      "learning_rate": 3.5485766137276894e-06,
+      "loss": 0.3531,
+      "step": 40
+    },
+    {
+      "epoch": 3.1538461538461537,
+      "grad_norm": 0.6375294587624502,
+      "learning_rate": 3.3155507330389004e-06,
+      "loss": 0.3531,
+      "step": 41
+    },
+    {
+      "epoch": 3.230769230769231,
+      "grad_norm": 0.618128196978316,
+      "learning_rate": 3.0865828381745515e-06,
+      "loss": 0.3411,
+      "step": 42
+    },
+    {
+      "epoch": 3.3076923076923075,
+      "grad_norm": 0.6384871969000007,
+      "learning_rate": 2.862224532848591e-06,
+      "loss": 0.3315,
+      "step": 43
+    },
+    {
+      "epoch": 3.3846153846153846,
+      "grad_norm": 0.561255153854599,
+      "learning_rate": 2.6430163158700116e-06,
+      "loss": 0.3666,
+      "step": 44
+    },
+    {
+      "epoch": 3.4615384615384617,
+      "grad_norm": 0.5408489944687312,
+      "learning_rate": 2.429486279033892e-06,
+      "loss": 0.3705,
+      "step": 45
+    },
+    {
+      "epoch": 3.5384615384615383,
+      "grad_norm": 0.5671728055194564,
+      "learning_rate": 2.2221488349019903e-06,
+      "loss": 0.3648,
+      "step": 46
+    },
+    {
+      "epoch": 3.6153846153846154,
+      "grad_norm": 0.5700975153063862,
+      "learning_rate": 2.0215034775378336e-06,
+      "loss": 0.3223,
+      "step": 47
+    },
+    {
+      "epoch": 3.6923076923076925,
+      "grad_norm": 0.664767908088173,
+      "learning_rate": 1.8280335791817733e-06,
+      "loss": 0.3183,
+      "step": 48
+    },
+    {
+      "epoch": 3.769230769230769,
+      "grad_norm": 0.6330871629952953,
+      "learning_rate": 1.642205225764908e-06,
+      "loss": 0.3209,
+      "step": 49
+    },
+    {
+      "epoch": 3.8461538461538463,
+      "grad_norm": 0.6309153488529204,
+      "learning_rate": 1.4644660940672628e-06,
+      "loss": 0.3199,
+      "step": 50
+    },
+    {
+      "epoch": 3.9230769230769234,
+      "grad_norm": 0.55827785835018,
+      "learning_rate": 1.2952443732252058e-06,
+      "loss": 0.3426,
+      "step": 51
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 0.5523594386231,
+      "learning_rate": 1.134947733186315e-06,
+      "loss": 0.3742,
+      "step": 52
+    },
+    {
+      "epoch": 4.076923076923077,
+      "grad_norm": 0.5458600271870175,
+      "learning_rate": 9.83962342596776e-07,
+      "loss": 0.3534,
+      "step": 53
+    },
+    {
+      "epoch": 4.153846153846154,
+      "grad_norm": 0.6854362784340675,
+      "learning_rate": 8.426519384872733e-07,
+      "loss": 0.3497,
+      "step": 54
+    },
+    {
+      "epoch": 4.230769230769231,
+      "grad_norm": 0.6032529402232379,
+      "learning_rate": 7.113569499986401e-07,
+      "loss": 0.2484,
+      "step": 55
+    },
+    {
+      "epoch": 4.3076923076923075,
+      "grad_norm": 0.7477132723449166,
+      "learning_rate": 5.903936782582253e-07,
+      "loss": 0.244,
+      "step": 56
+    },
+    {
+      "epoch": 4.384615384615385,
+      "grad_norm": 0.5868192979660759,
+      "learning_rate": 4.800535343827834e-07,
+      "loss": 0.3561,
+      "step": 57
+    },
+    {
+      "epoch": 4.461538461538462,
+      "grad_norm": 0.5590511625666816,
+      "learning_rate": 3.8060233744356634e-07,
+      "loss": 0.3018,
+      "step": 58
+    },
+    {
+      "epoch": 4.538461538461538,
+      "grad_norm": 0.573822252707526,
+      "learning_rate": 2.9227967408489653e-07,
+      "loss": 0.3274,
+      "step": 59
+    },
+    {
+      "epoch": 4.615384615384615,
+      "grad_norm": 0.5539473518142963,
+      "learning_rate": 2.152983213389559e-07,
+      "loss": 0.329,
+      "step": 60
+    },
+    {
+      "epoch": 4.6923076923076925,
+      "grad_norm": 0.5602498609734554,
+      "learning_rate": 1.4984373402728014e-07,
+      "loss": 0.3235,
+      "step": 61
+    },
+    {
+      "epoch": 4.769230769230769,
+      "grad_norm": 0.5088498618342225,
+      "learning_rate": 9.607359798384785e-08,
+      "loss": 0.357,
+      "step": 62
+    },
+    {
+      "epoch": 4.846153846153846,
+      "grad_norm": 0.5487703200660301,
+      "learning_rate": 5.411745017609493e-08,
+      "loss": 0.2892,
+      "step": 63
+    },
+    {
+      "epoch": 4.923076923076923,
+      "grad_norm": 0.5219657252094004,
+      "learning_rate": 2.4076366639015914e-08,
+      "loss": 0.349,
+      "step": 64
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.5380803859588003,
+      "learning_rate": 6.022718974137976e-09,
+      "loss": 0.3456,
+      "step": 65
+    },
+    {
+      "epoch": 5.0,
+      "step": 65,
+      "total_flos": 12383961956352.0,
+      "train_loss": 0.45606403878101937,
+      "train_runtime": 1434.1495,
+      "train_samples_per_second": 0.352,
+      "train_steps_per_second": 0.045
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 65,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 42,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 12383961956352.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_loss.png ADDED Viewed