End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +710 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh-dcft-v1.2_no-curation_gpt-4o-mini_wo_camel_chemistry
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh-dcft-v1.2_no-curation_gpt-4o-mini_wo_camel_chemistry
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.6654

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh-dcft-v1.2_no-curation_gpt-4o-mini_wo_camel_chemistry
 # oh-dcft-v1.2_no-curation_gpt-4o-mini_wo_camel_chemistry
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh-dcft-v1.2_no-curation_gpt-4o-mini_wo_camel_chemistry dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.6654

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.6653555631637573,
+    "eval_runtime": 29.686,
+    "eval_samples_per_second": 279.054,
+    "eval_steps_per_second": 1.112,
+    "total_flos": 1547734414786560.0,
+    "train_loss": 0.6307248511871735,
+    "train_runtime": 5890.0632,
+    "train_samples_per_second": 80.163,
+    "train_steps_per_second": 0.157
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.6653555631637573,
+    "eval_runtime": 29.686,
+    "eval_samples_per_second": 279.054,
+    "eval_steps_per_second": 1.112
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1547734414786560.0,
+    "train_loss": 0.6307248511871735,
+    "train_runtime": 5890.0632,
+    "train_samples_per_second": 80.163,
+    "train_steps_per_second": 0.157
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,710 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 924,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032467532467532464,
+      "grad_norm": 6.265174130511246,
+      "learning_rate": 5e-06,
+      "loss": 0.9365,
+      "step": 10
+    },
+    {
+      "epoch": 0.06493506493506493,
+      "grad_norm": 1.3770366162111063,
+      "learning_rate": 5e-06,
+      "loss": 0.8189,
+      "step": 20
+    },
+    {
+      "epoch": 0.09740259740259741,
+      "grad_norm": 1.0978187926506935,
+      "learning_rate": 5e-06,
+      "loss": 0.7833,
+      "step": 30
+    },
+    {
+      "epoch": 0.12987012987012986,
+      "grad_norm": 0.981210956561077,
+      "learning_rate": 5e-06,
+      "loss": 0.7556,
+      "step": 40
+    },
+    {
+      "epoch": 0.16233766233766234,
+      "grad_norm": 0.9693356495883646,
+      "learning_rate": 5e-06,
+      "loss": 0.7414,
+      "step": 50
+    },
+    {
+      "epoch": 0.19480519480519481,
+      "grad_norm": 1.0030466632996962,
+      "learning_rate": 5e-06,
+      "loss": 0.7236,
+      "step": 60
+    },
+    {
+      "epoch": 0.22727272727272727,
+      "grad_norm": 1.1260479581636729,
+      "learning_rate": 5e-06,
+      "loss": 0.7169,
+      "step": 70
+    },
+    {
+      "epoch": 0.2597402597402597,
+      "grad_norm": 0.7460895709589158,
+      "learning_rate": 5e-06,
+      "loss": 0.7168,
+      "step": 80
+    },
+    {
+      "epoch": 0.2922077922077922,
+      "grad_norm": 0.9486986195334304,
+      "learning_rate": 5e-06,
+      "loss": 0.7031,
+      "step": 90
+    },
+    {
+      "epoch": 0.3246753246753247,
+      "grad_norm": 0.7051606863668234,
+      "learning_rate": 5e-06,
+      "loss": 0.6974,
+      "step": 100
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 0.6756560441314118,
+      "learning_rate": 5e-06,
+      "loss": 0.692,
+      "step": 110
+    },
+    {
+      "epoch": 0.38961038961038963,
+      "grad_norm": 0.5240739369726283,
+      "learning_rate": 5e-06,
+      "loss": 0.693,
+      "step": 120
+    },
+    {
+      "epoch": 0.42207792207792205,
+      "grad_norm": 0.5785376996044719,
+      "learning_rate": 5e-06,
+      "loss": 0.689,
+      "step": 130
+    },
+    {
+      "epoch": 0.45454545454545453,
+      "grad_norm": 0.5933112722295861,
+      "learning_rate": 5e-06,
+      "loss": 0.691,
+      "step": 140
+    },
+    {
+      "epoch": 0.487012987012987,
+      "grad_norm": 0.6077588775577111,
+      "learning_rate": 5e-06,
+      "loss": 0.6806,
+      "step": 150
+    },
+    {
+      "epoch": 0.5194805194805194,
+      "grad_norm": 0.6238054182160374,
+      "learning_rate": 5e-06,
+      "loss": 0.6817,
+      "step": 160
+    },
+    {
+      "epoch": 0.551948051948052,
+      "grad_norm": 0.6081002667408969,
+      "learning_rate": 5e-06,
+      "loss": 0.6807,
+      "step": 170
+    },
+    {
+      "epoch": 0.5844155844155844,
+      "grad_norm": 0.6219764792866612,
+      "learning_rate": 5e-06,
+      "loss": 0.6773,
+      "step": 180
+    },
+    {
+      "epoch": 0.6168831168831169,
+      "grad_norm": 0.739529394087955,
+      "learning_rate": 5e-06,
+      "loss": 0.6795,
+      "step": 190
+    },
+    {
+      "epoch": 0.6493506493506493,
+      "grad_norm": 0.7524681424985254,
+      "learning_rate": 5e-06,
+      "loss": 0.6723,
+      "step": 200
+    },
+    {
+      "epoch": 0.6818181818181818,
+      "grad_norm": 0.6010281827966147,
+      "learning_rate": 5e-06,
+      "loss": 0.6709,
+      "step": 210
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.6611165599870378,
+      "learning_rate": 5e-06,
+      "loss": 0.6692,
+      "step": 220
+    },
+    {
+      "epoch": 0.7467532467532467,
+      "grad_norm": 0.8344801352021102,
+      "learning_rate": 5e-06,
+      "loss": 0.6738,
+      "step": 230
+    },
+    {
+      "epoch": 0.7792207792207793,
+      "grad_norm": 0.7851764850319622,
+      "learning_rate": 5e-06,
+      "loss": 0.6692,
+      "step": 240
+    },
+    {
+      "epoch": 0.8116883116883117,
+      "grad_norm": 0.6068138322416587,
+      "learning_rate": 5e-06,
+      "loss": 0.6693,
+      "step": 250
+    },
+    {
+      "epoch": 0.8441558441558441,
+      "grad_norm": 0.5781959225993195,
+      "learning_rate": 5e-06,
+      "loss": 0.6698,
+      "step": 260
+    },
+    {
+      "epoch": 0.8766233766233766,
+      "grad_norm": 0.7049586430934481,
+      "learning_rate": 5e-06,
+      "loss": 0.672,
+      "step": 270
+    },
+    {
+      "epoch": 0.9090909090909091,
+      "grad_norm": 0.6323170370591866,
+      "learning_rate": 5e-06,
+      "loss": 0.6668,
+      "step": 280
+    },
+    {
+      "epoch": 0.9415584415584416,
+      "grad_norm": 0.881618301887001,
+      "learning_rate": 5e-06,
+      "loss": 0.6706,
+      "step": 290
+    },
+    {
+      "epoch": 0.974025974025974,
+      "grad_norm": 0.5219254149696031,
+      "learning_rate": 5e-06,
+      "loss": 0.6679,
+      "step": 300
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.6691647171974182,
+      "eval_runtime": 30.5714,
+      "eval_samples_per_second": 270.972,
+      "eval_steps_per_second": 1.079,
+      "step": 308
+    },
+    {
+      "epoch": 1.0064935064935066,
+      "grad_norm": 0.7028333992981749,
+      "learning_rate": 5e-06,
+      "loss": 0.6597,
+      "step": 310
+    },
+    {
+      "epoch": 1.0389610389610389,
+      "grad_norm": 0.620216384870711,
+      "learning_rate": 5e-06,
+      "loss": 0.6178,
+      "step": 320
+    },
+    {
+      "epoch": 1.0714285714285714,
+      "grad_norm": 0.6279544966110486,
+      "learning_rate": 5e-06,
+      "loss": 0.6196,
+      "step": 330
+    },
+    {
+      "epoch": 1.103896103896104,
+      "grad_norm": 0.47691022078448675,
+      "learning_rate": 5e-06,
+      "loss": 0.6213,
+      "step": 340
+    },
+    {
+      "epoch": 1.1363636363636362,
+      "grad_norm": 0.6169659732755709,
+      "learning_rate": 5e-06,
+      "loss": 0.6189,
+      "step": 350
+    },
+    {
+      "epoch": 1.1688311688311688,
+      "grad_norm": 0.6930896730291389,
+      "learning_rate": 5e-06,
+      "loss": 0.6179,
+      "step": 360
+    },
+    {
+      "epoch": 1.2012987012987013,
+      "grad_norm": 0.5888468229519391,
+      "learning_rate": 5e-06,
+      "loss": 0.6193,
+      "step": 370
+    },
+    {
+      "epoch": 1.2337662337662338,
+      "grad_norm": 0.5114807666495347,
+      "learning_rate": 5e-06,
+      "loss": 0.6205,
+      "step": 380
+    },
+    {
+      "epoch": 1.2662337662337662,
+      "grad_norm": 0.576480885597218,
+      "learning_rate": 5e-06,
+      "loss": 0.6143,
+      "step": 390
+    },
+    {
+      "epoch": 1.2987012987012987,
+      "grad_norm": 0.9781557440302872,
+      "learning_rate": 5e-06,
+      "loss": 0.616,
+      "step": 400
+    },
+    {
+      "epoch": 1.3311688311688312,
+      "grad_norm": 0.5493968761484528,
+      "learning_rate": 5e-06,
+      "loss": 0.6181,
+      "step": 410
+    },
+    {
+      "epoch": 1.3636363636363638,
+      "grad_norm": 0.8450188883114491,
+      "learning_rate": 5e-06,
+      "loss": 0.6186,
+      "step": 420
+    },
+    {
+      "epoch": 1.396103896103896,
+      "grad_norm": 0.6672141224772778,
+      "learning_rate": 5e-06,
+      "loss": 0.6182,
+      "step": 430
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 0.5436445484738832,
+      "learning_rate": 5e-06,
+      "loss": 0.6147,
+      "step": 440
+    },
+    {
+      "epoch": 1.4610389610389611,
+      "grad_norm": 0.5830504026660146,
+      "learning_rate": 5e-06,
+      "loss": 0.6179,
+      "step": 450
+    },
+    {
+      "epoch": 1.4935064935064934,
+      "grad_norm": 0.6473257236943104,
+      "learning_rate": 5e-06,
+      "loss": 0.6199,
+      "step": 460
+    },
+    {
+      "epoch": 1.525974025974026,
+      "grad_norm": 0.5427880278607804,
+      "learning_rate": 5e-06,
+      "loss": 0.6168,
+      "step": 470
+    },
+    {
+      "epoch": 1.5584415584415585,
+      "grad_norm": 0.5689580314401272,
+      "learning_rate": 5e-06,
+      "loss": 0.6159,
+      "step": 480
+    },
+    {
+      "epoch": 1.5909090909090908,
+      "grad_norm": 0.597927845953086,
+      "learning_rate": 5e-06,
+      "loss": 0.6175,
+      "step": 490
+    },
+    {
+      "epoch": 1.6233766233766234,
+      "grad_norm": 0.6128642707216239,
+      "learning_rate": 5e-06,
+      "loss": 0.6163,
+      "step": 500
+    },
+    {
+      "epoch": 1.655844155844156,
+      "grad_norm": 0.5455974938431143,
+      "learning_rate": 5e-06,
+      "loss": 0.6168,
+      "step": 510
+    },
+    {
+      "epoch": 1.6883116883116882,
+      "grad_norm": 0.5153120159264221,
+      "learning_rate": 5e-06,
+      "loss": 0.6204,
+      "step": 520
+    },
+    {
+      "epoch": 1.7207792207792207,
+      "grad_norm": 0.5767601324955324,
+      "learning_rate": 5e-06,
+      "loss": 0.619,
+      "step": 530
+    },
+    {
+      "epoch": 1.7532467532467533,
+      "grad_norm": 0.5856685996311523,
+      "learning_rate": 5e-06,
+      "loss": 0.6195,
+      "step": 540
+    },
+    {
+      "epoch": 1.7857142857142856,
+      "grad_norm": 0.5318505472371191,
+      "learning_rate": 5e-06,
+      "loss": 0.6142,
+      "step": 550
+    },
+    {
+      "epoch": 1.8181818181818183,
+      "grad_norm": 0.5867934386348821,
+      "learning_rate": 5e-06,
+      "loss": 0.6163,
+      "step": 560
+    },
+    {
+      "epoch": 1.8506493506493507,
+      "grad_norm": 0.5460100351131841,
+      "learning_rate": 5e-06,
+      "loss": 0.6209,
+      "step": 570
+    },
+    {
+      "epoch": 1.883116883116883,
+      "grad_norm": 0.5930045996717794,
+      "learning_rate": 5e-06,
+      "loss": 0.6173,
+      "step": 580
+    },
+    {
+      "epoch": 1.9155844155844157,
+      "grad_norm": 0.5210440244932204,
+      "learning_rate": 5e-06,
+      "loss": 0.6172,
+      "step": 590
+    },
+    {
+      "epoch": 1.948051948051948,
+      "grad_norm": 0.678263024145128,
+      "learning_rate": 5e-06,
+      "loss": 0.6219,
+      "step": 600
+    },
+    {
+      "epoch": 1.9805194805194806,
+      "grad_norm": 0.5193570456262979,
+      "learning_rate": 5e-06,
+      "loss": 0.6119,
+      "step": 610
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6595985293388367,
+      "eval_runtime": 30.7002,
+      "eval_samples_per_second": 269.835,
+      "eval_steps_per_second": 1.075,
+      "step": 616
+    },
+    {
+      "epoch": 2.012987012987013,
+      "grad_norm": 0.997715568070811,
+      "learning_rate": 5e-06,
+      "loss": 0.5894,
+      "step": 620
+    },
+    {
+      "epoch": 2.0454545454545454,
+      "grad_norm": 0.7592737794541236,
+      "learning_rate": 5e-06,
+      "loss": 0.5677,
+      "step": 630
+    },
+    {
+      "epoch": 2.0779220779220777,
+      "grad_norm": 0.6056584838864539,
+      "learning_rate": 5e-06,
+      "loss": 0.5669,
+      "step": 640
+    },
+    {
+      "epoch": 2.1103896103896105,
+      "grad_norm": 0.6211039916928865,
+      "learning_rate": 5e-06,
+      "loss": 0.5697,
+      "step": 650
+    },
+    {
+      "epoch": 2.142857142857143,
+      "grad_norm": 0.615106862254971,
+      "learning_rate": 5e-06,
+      "loss": 0.5693,
+      "step": 660
+    },
+    {
+      "epoch": 2.175324675324675,
+      "grad_norm": 0.566417720964845,
+      "learning_rate": 5e-06,
+      "loss": 0.577,
+      "step": 670
+    },
+    {
+      "epoch": 2.207792207792208,
+      "grad_norm": 0.5480748428783726,
+      "learning_rate": 5e-06,
+      "loss": 0.5724,
+      "step": 680
+    },
+    {
+      "epoch": 2.24025974025974,
+      "grad_norm": 0.6883572551516758,
+      "learning_rate": 5e-06,
+      "loss": 0.5663,
+      "step": 690
+    },
+    {
+      "epoch": 2.2727272727272725,
+      "grad_norm": 0.7624758724871575,
+      "learning_rate": 5e-06,
+      "loss": 0.5721,
+      "step": 700
+    },
+    {
+      "epoch": 2.3051948051948052,
+      "grad_norm": 0.5925041865618843,
+      "learning_rate": 5e-06,
+      "loss": 0.5718,
+      "step": 710
+    },
+    {
+      "epoch": 2.3376623376623376,
+      "grad_norm": 0.5423034645452969,
+      "learning_rate": 5e-06,
+      "loss": 0.5681,
+      "step": 720
+    },
+    {
+      "epoch": 2.3701298701298703,
+      "grad_norm": 0.5480316834860852,
+      "learning_rate": 5e-06,
+      "loss": 0.5722,
+      "step": 730
+    },
+    {
+      "epoch": 2.4025974025974026,
+      "grad_norm": 0.5169062030347897,
+      "learning_rate": 5e-06,
+      "loss": 0.578,
+      "step": 740
+    },
+    {
+      "epoch": 2.435064935064935,
+      "grad_norm": 0.5457808079840645,
+      "learning_rate": 5e-06,
+      "loss": 0.57,
+      "step": 750
+    },
+    {
+      "epoch": 2.4675324675324677,
+      "grad_norm": 0.5470205045138103,
+      "learning_rate": 5e-06,
+      "loss": 0.5726,
+      "step": 760
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.5125136364795218,
+      "learning_rate": 5e-06,
+      "loss": 0.5693,
+      "step": 770
+    },
+    {
+      "epoch": 2.5324675324675323,
+      "grad_norm": 0.5945664415971015,
+      "learning_rate": 5e-06,
+      "loss": 0.5714,
+      "step": 780
+    },
+    {
+      "epoch": 2.564935064935065,
+      "grad_norm": 0.5702694037641614,
+      "learning_rate": 5e-06,
+      "loss": 0.5689,
+      "step": 790
+    },
+    {
+      "epoch": 2.5974025974025974,
+      "grad_norm": 0.5441374726350022,
+      "learning_rate": 5e-06,
+      "loss": 0.5742,
+      "step": 800
+    },
+    {
+      "epoch": 2.62987012987013,
+      "grad_norm": 0.5674621294447999,
+      "learning_rate": 5e-06,
+      "loss": 0.5687,
+      "step": 810
+    },
+    {
+      "epoch": 2.6623376623376624,
+      "grad_norm": 0.5997098488587294,
+      "learning_rate": 5e-06,
+      "loss": 0.5763,
+      "step": 820
+    },
+    {
+      "epoch": 2.6948051948051948,
+      "grad_norm": 0.6199757649220302,
+      "learning_rate": 5e-06,
+      "loss": 0.5747,
+      "step": 830
+    },
+    {
+      "epoch": 2.7272727272727275,
+      "grad_norm": 0.6911213249901123,
+      "learning_rate": 5e-06,
+      "loss": 0.5711,
+      "step": 840
+    },
+    {
+      "epoch": 2.75974025974026,
+      "grad_norm": 0.5709123176208969,
+      "learning_rate": 5e-06,
+      "loss": 0.5701,
+      "step": 850
+    },
+    {
+      "epoch": 2.792207792207792,
+      "grad_norm": 0.6304517541226137,
+      "learning_rate": 5e-06,
+      "loss": 0.5673,
+      "step": 860
+    },
+    {
+      "epoch": 2.824675324675325,
+      "grad_norm": 0.6030037959776535,
+      "learning_rate": 5e-06,
+      "loss": 0.5713,
+      "step": 870
+    },
+    {
+      "epoch": 2.857142857142857,
+      "grad_norm": 0.5603204730571357,
+      "learning_rate": 5e-06,
+      "loss": 0.5749,
+      "step": 880
+    },
+    {
+      "epoch": 2.8896103896103895,
+      "grad_norm": 0.5148606934943276,
+      "learning_rate": 5e-06,
+      "loss": 0.5671,
+      "step": 890
+    },
+    {
+      "epoch": 2.9220779220779223,
+      "grad_norm": 0.770823574891512,
+      "learning_rate": 5e-06,
+      "loss": 0.5694,
+      "step": 900
+    },
+    {
+      "epoch": 2.9545454545454546,
+      "grad_norm": 0.6707592403791355,
+      "learning_rate": 5e-06,
+      "loss": 0.5691,
+      "step": 910
+    },
+    {
+      "epoch": 2.987012987012987,
+      "grad_norm": 0.7817460976590817,
+      "learning_rate": 5e-06,
+      "loss": 0.568,
+      "step": 920
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.6653555631637573,
+      "eval_runtime": 29.9562,
+      "eval_samples_per_second": 276.537,
+      "eval_steps_per_second": 1.102,
+      "step": 924
+    },
+    {
+      "epoch": 3.0,
+      "step": 924,
+      "total_flos": 1547734414786560.0,
+      "train_loss": 0.6307248511871735,
+      "train_runtime": 5890.0632,
+      "train_samples_per_second": 80.163,
+      "train_steps_per_second": 0.157
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 924,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1547734414786560.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed