End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +696 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_chemistry_x4
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # oh_v1.3_camel_chemistry_x4
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7374

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: oh_v1.3_camel_chemistry_x4
 # oh_v1.3_camel_chemistry_x4
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/oh_v1.3_camel_chemistry_x4 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7374

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 2.9987628865979383,
+    "eval_loss": 0.737443208694458,
+    "eval_runtime": 323.8922,
+    "eval_samples_per_second": 25.218,
+    "eval_steps_per_second": 0.395,
+    "total_flos": 1522399476449280.0,
+    "train_loss": 0.7216839113644641,
+    "train_runtime": 53747.0627,
+    "train_samples_per_second": 8.662,
+    "train_steps_per_second": 0.017
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 2.9987628865979383,
+    "eval_loss": 0.737443208694458,
+    "eval_runtime": 323.8922,
+    "eval_samples_per_second": 25.218,
+    "eval_steps_per_second": 0.395
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.9987628865979383,
+    "total_flos": 1522399476449280.0,
+    "train_loss": 0.7216839113644641,
+    "train_runtime": 53747.0627,
+    "train_samples_per_second": 8.662,
+    "train_steps_per_second": 0.017
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,696 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9987628865979383,
+  "eval_steps": 500,
+  "global_step": 909,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.032989690721649485,
+      "grad_norm": 3.190283465974621,
+      "learning_rate": 5e-06,
+      "loss": 1.0361,
+      "step": 10
+    },
+    {
+      "epoch": 0.06597938144329897,
+      "grad_norm": 1.2412136578455273,
+      "learning_rate": 5e-06,
+      "loss": 0.9152,
+      "step": 20
+    },
+    {
+      "epoch": 0.09896907216494845,
+      "grad_norm": 0.9937983329085553,
+      "learning_rate": 5e-06,
+      "loss": 0.8771,
+      "step": 30
+    },
+    {
+      "epoch": 0.13195876288659794,
+      "grad_norm": 0.9565722646796444,
+      "learning_rate": 5e-06,
+      "loss": 0.855,
+      "step": 40
+    },
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 0.8422015410274669,
+      "learning_rate": 5e-06,
+      "loss": 0.834,
+      "step": 50
+    },
+    {
+      "epoch": 0.1979381443298969,
+      "grad_norm": 0.9927761059685116,
+      "learning_rate": 5e-06,
+      "loss": 0.8249,
+      "step": 60
+    },
+    {
+      "epoch": 0.2309278350515464,
+      "grad_norm": 1.0124625468461579,
+      "learning_rate": 5e-06,
+      "loss": 0.8092,
+      "step": 70
+    },
+    {
+      "epoch": 0.2639175257731959,
+      "grad_norm": 0.9162707322543721,
+      "learning_rate": 5e-06,
+      "loss": 0.8002,
+      "step": 80
+    },
+    {
+      "epoch": 0.29690721649484536,
+      "grad_norm": 0.7438936667406614,
+      "learning_rate": 5e-06,
+      "loss": 0.7953,
+      "step": 90
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 0.74943987165883,
+      "learning_rate": 5e-06,
+      "loss": 0.7912,
+      "step": 100
+    },
+    {
+      "epoch": 0.3628865979381443,
+      "grad_norm": 0.6393536784375358,
+      "learning_rate": 5e-06,
+      "loss": 0.7835,
+      "step": 110
+    },
+    {
+      "epoch": 0.3958762886597938,
+      "grad_norm": 0.6732184370236527,
+      "learning_rate": 5e-06,
+      "loss": 0.7846,
+      "step": 120
+    },
+    {
+      "epoch": 0.4288659793814433,
+      "grad_norm": 0.675715599863121,
+      "learning_rate": 5e-06,
+      "loss": 0.7835,
+      "step": 130
+    },
+    {
+      "epoch": 0.4618556701030928,
+      "grad_norm": 0.638472159519646,
+      "learning_rate": 5e-06,
+      "loss": 0.7797,
+      "step": 140
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 1.1212007213917934,
+      "learning_rate": 5e-06,
+      "loss": 0.7759,
+      "step": 150
+    },
+    {
+      "epoch": 0.5278350515463918,
+      "grad_norm": 0.5929058650136099,
+      "learning_rate": 5e-06,
+      "loss": 0.7758,
+      "step": 160
+    },
+    {
+      "epoch": 0.5608247422680412,
+      "grad_norm": 0.7325664578284696,
+      "learning_rate": 5e-06,
+      "loss": 0.7746,
+      "step": 170
+    },
+    {
+      "epoch": 0.5938144329896907,
+      "grad_norm": 0.7146407893100764,
+      "learning_rate": 5e-06,
+      "loss": 0.772,
+      "step": 180
+    },
+    {
+      "epoch": 0.6268041237113402,
+      "grad_norm": 0.6297828498939105,
+      "learning_rate": 5e-06,
+      "loss": 0.7686,
+      "step": 190
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 0.650337063259678,
+      "learning_rate": 5e-06,
+      "loss": 0.7668,
+      "step": 200
+    },
+    {
+      "epoch": 0.6927835051546392,
+      "grad_norm": 0.577352278155154,
+      "learning_rate": 5e-06,
+      "loss": 0.7633,
+      "step": 210
+    },
+    {
+      "epoch": 0.7257731958762886,
+      "grad_norm": 0.6351053699389445,
+      "learning_rate": 5e-06,
+      "loss": 0.7606,
+      "step": 220
+    },
+    {
+      "epoch": 0.7587628865979381,
+      "grad_norm": 0.6179445706530043,
+      "learning_rate": 5e-06,
+      "loss": 0.7644,
+      "step": 230
+    },
+    {
+      "epoch": 0.7917525773195876,
+      "grad_norm": 0.7772047208925177,
+      "learning_rate": 5e-06,
+      "loss": 0.7585,
+      "step": 240
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 0.6393400921262609,
+      "learning_rate": 5e-06,
+      "loss": 0.7597,
+      "step": 250
+    },
+    {
+      "epoch": 0.8577319587628865,
+      "grad_norm": 0.5366628092052804,
+      "learning_rate": 5e-06,
+      "loss": 0.7559,
+      "step": 260
+    },
+    {
+      "epoch": 0.8907216494845361,
+      "grad_norm": 0.6897451596502111,
+      "learning_rate": 5e-06,
+      "loss": 0.757,
+      "step": 270
+    },
+    {
+      "epoch": 0.9237113402061856,
+      "grad_norm": 0.683076031456689,
+      "learning_rate": 5e-06,
+      "loss": 0.7595,
+      "step": 280
+    },
+    {
+      "epoch": 0.9567010309278351,
+      "grad_norm": 0.6342586759859082,
+      "learning_rate": 5e-06,
+      "loss": 0.7576,
+      "step": 290
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 0.6339977397184522,
+      "learning_rate": 5e-06,
+      "loss": 0.7548,
+      "step": 300
+    },
+    {
+      "epoch": 0.9995876288659794,
+      "eval_loss": 0.7519278526306152,
+      "eval_runtime": 322.6674,
+      "eval_samples_per_second": 25.314,
+      "eval_steps_per_second": 0.397,
+      "step": 303
+    },
+    {
+      "epoch": 1.022680412371134,
+      "grad_norm": 0.810990287818241,
+      "learning_rate": 5e-06,
+      "loss": 0.7934,
+      "step": 310
+    },
+    {
+      "epoch": 1.0556701030927835,
+      "grad_norm": 0.5989713675220099,
+      "learning_rate": 5e-06,
+      "loss": 0.7044,
+      "step": 320
+    },
+    {
+      "epoch": 1.088659793814433,
+      "grad_norm": 0.584782182855064,
+      "learning_rate": 5e-06,
+      "loss": 0.7115,
+      "step": 330
+    },
+    {
+      "epoch": 1.1216494845360825,
+      "grad_norm": 0.7858920415247334,
+      "learning_rate": 5e-06,
+      "loss": 0.7094,
+      "step": 340
+    },
+    {
+      "epoch": 1.1546391752577319,
+      "grad_norm": 0.9401995798606461,
+      "learning_rate": 5e-06,
+      "loss": 0.7079,
+      "step": 350
+    },
+    {
+      "epoch": 1.1876288659793814,
+      "grad_norm": 0.6150009311102699,
+      "learning_rate": 5e-06,
+      "loss": 0.7073,
+      "step": 360
+    },
+    {
+      "epoch": 1.220618556701031,
+      "grad_norm": 0.6009149100944755,
+      "learning_rate": 5e-06,
+      "loss": 0.7096,
+      "step": 370
+    },
+    {
+      "epoch": 1.2536082474226804,
+      "grad_norm": 0.6115518108906659,
+      "learning_rate": 5e-06,
+      "loss": 0.7066,
+      "step": 380
+    },
+    {
+      "epoch": 1.2865979381443298,
+      "grad_norm": 0.7496882281145417,
+      "learning_rate": 5e-06,
+      "loss": 0.7076,
+      "step": 390
+    },
+    {
+      "epoch": 1.3195876288659794,
+      "grad_norm": 0.6685224897984725,
+      "learning_rate": 5e-06,
+      "loss": 0.7062,
+      "step": 400
+    },
+    {
+      "epoch": 1.352577319587629,
+      "grad_norm": 0.641185927057492,
+      "learning_rate": 5e-06,
+      "loss": 0.7117,
+      "step": 410
+    },
+    {
+      "epoch": 1.3855670103092783,
+      "grad_norm": 0.5361388827305237,
+      "learning_rate": 5e-06,
+      "loss": 0.7094,
+      "step": 420
+    },
+    {
+      "epoch": 1.418556701030928,
+      "grad_norm": 1.002359631516242,
+      "learning_rate": 5e-06,
+      "loss": 0.7054,
+      "step": 430
+    },
+    {
+      "epoch": 1.4515463917525773,
+      "grad_norm": 0.8431450479727091,
+      "learning_rate": 5e-06,
+      "loss": 0.7075,
+      "step": 440
+    },
+    {
+      "epoch": 1.4845360824742269,
+      "grad_norm": 0.6447323729739957,
+      "learning_rate": 5e-06,
+      "loss": 0.7099,
+      "step": 450
+    },
+    {
+      "epoch": 1.5175257731958762,
+      "grad_norm": 0.8431314429320579,
+      "learning_rate": 5e-06,
+      "loss": 0.7018,
+      "step": 460
+    },
+    {
+      "epoch": 1.5505154639175258,
+      "grad_norm": 0.6273662519128372,
+      "learning_rate": 5e-06,
+      "loss": 0.7051,
+      "step": 470
+    },
+    {
+      "epoch": 1.5835051546391754,
+      "grad_norm": 0.8396735090007554,
+      "learning_rate": 5e-06,
+      "loss": 0.7106,
+      "step": 480
+    },
+    {
+      "epoch": 1.6164948453608248,
+      "grad_norm": 0.5802654475284174,
+      "learning_rate": 5e-06,
+      "loss": 0.7043,
+      "step": 490
+    },
+    {
+      "epoch": 1.6494845360824741,
+      "grad_norm": 0.6224806599884348,
+      "learning_rate": 5e-06,
+      "loss": 0.7086,
+      "step": 500
+    },
+    {
+      "epoch": 1.6824742268041237,
+      "grad_norm": 0.6154446076130442,
+      "learning_rate": 5e-06,
+      "loss": 0.7026,
+      "step": 510
+    },
+    {
+      "epoch": 1.7154639175257733,
+      "grad_norm": 0.5857753449684375,
+      "learning_rate": 5e-06,
+      "loss": 0.7037,
+      "step": 520
+    },
+    {
+      "epoch": 1.7484536082474227,
+      "grad_norm": 0.5716099691987403,
+      "learning_rate": 5e-06,
+      "loss": 0.7066,
+      "step": 530
+    },
+    {
+      "epoch": 1.781443298969072,
+      "grad_norm": 0.6774790897099987,
+      "learning_rate": 5e-06,
+      "loss": 0.707,
+      "step": 540
+    },
+    {
+      "epoch": 1.8144329896907216,
+      "grad_norm": 0.6117062221128381,
+      "learning_rate": 5e-06,
+      "loss": 0.6996,
+      "step": 550
+    },
+    {
+      "epoch": 1.8474226804123712,
+      "grad_norm": 0.5362825787566358,
+      "learning_rate": 5e-06,
+      "loss": 0.7036,
+      "step": 560
+    },
+    {
+      "epoch": 1.8804123711340206,
+      "grad_norm": 0.7851595485494056,
+      "learning_rate": 5e-06,
+      "loss": 0.7066,
+      "step": 570
+    },
+    {
+      "epoch": 1.91340206185567,
+      "grad_norm": 0.642752104749906,
+      "learning_rate": 5e-06,
+      "loss": 0.705,
+      "step": 580
+    },
+    {
+      "epoch": 1.9463917525773196,
+      "grad_norm": 0.6663997639727156,
+      "learning_rate": 5e-06,
+      "loss": 0.7051,
+      "step": 590
+    },
+    {
+      "epoch": 1.9793814432989691,
+      "grad_norm": 0.6435653630361237,
+      "learning_rate": 5e-06,
+      "loss": 0.7026,
+      "step": 600
+    },
+    {
+      "epoch": 1.9991752577319588,
+      "eval_loss": 0.7376570701599121,
+      "eval_runtime": 321.676,
+      "eval_samples_per_second": 25.392,
+      "eval_steps_per_second": 0.398,
+      "step": 606
+    },
+    {
+      "epoch": 2.0123711340206185,
+      "grad_norm": 0.7966992322635892,
+      "learning_rate": 5e-06,
+      "loss": 0.7418,
+      "step": 610
+    },
+    {
+      "epoch": 2.045360824742268,
+      "grad_norm": 0.5919842716689093,
+      "learning_rate": 5e-06,
+      "loss": 0.6581,
+      "step": 620
+    },
+    {
+      "epoch": 2.0783505154639177,
+      "grad_norm": 0.6225831303900108,
+      "learning_rate": 5e-06,
+      "loss": 0.6494,
+      "step": 630
+    },
+    {
+      "epoch": 2.111340206185567,
+      "grad_norm": 0.5720666970317613,
+      "learning_rate": 5e-06,
+      "loss": 0.6557,
+      "step": 640
+    },
+    {
+      "epoch": 2.1443298969072164,
+      "grad_norm": 0.625621284764116,
+      "learning_rate": 5e-06,
+      "loss": 0.6562,
+      "step": 650
+    },
+    {
+      "epoch": 2.177319587628866,
+      "grad_norm": 0.722621026378947,
+      "learning_rate": 5e-06,
+      "loss": 0.6592,
+      "step": 660
+    },
+    {
+      "epoch": 2.2103092783505156,
+      "grad_norm": 0.6611874958125228,
+      "learning_rate": 5e-06,
+      "loss": 0.6576,
+      "step": 670
+    },
+    {
+      "epoch": 2.243298969072165,
+      "grad_norm": 0.624720046082098,
+      "learning_rate": 5e-06,
+      "loss": 0.6534,
+      "step": 680
+    },
+    {
+      "epoch": 2.2762886597938143,
+      "grad_norm": 0.6227890769590231,
+      "learning_rate": 5e-06,
+      "loss": 0.6561,
+      "step": 690
+    },
+    {
+      "epoch": 2.3092783505154637,
+      "grad_norm": 0.6353543358518403,
+      "learning_rate": 5e-06,
+      "loss": 0.6564,
+      "step": 700
+    },
+    {
+      "epoch": 2.3422680412371135,
+      "grad_norm": 0.616682251013517,
+      "learning_rate": 5e-06,
+      "loss": 0.6558,
+      "step": 710
+    },
+    {
+      "epoch": 2.375257731958763,
+      "grad_norm": 0.5785627398529801,
+      "learning_rate": 5e-06,
+      "loss": 0.6579,
+      "step": 720
+    },
+    {
+      "epoch": 2.4082474226804123,
+      "grad_norm": 0.7087632640527876,
+      "learning_rate": 5e-06,
+      "loss": 0.6578,
+      "step": 730
+    },
+    {
+      "epoch": 2.441237113402062,
+      "grad_norm": 0.7221097669514308,
+      "learning_rate": 5e-06,
+      "loss": 0.6555,
+      "step": 740
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 0.6845092133296887,
+      "learning_rate": 5e-06,
+      "loss": 0.6589,
+      "step": 750
+    },
+    {
+      "epoch": 2.507216494845361,
+      "grad_norm": 0.6131735355128494,
+      "learning_rate": 5e-06,
+      "loss": 0.6597,
+      "step": 760
+    },
+    {
+      "epoch": 2.54020618556701,
+      "grad_norm": 0.74499117668607,
+      "learning_rate": 5e-06,
+      "loss": 0.6604,
+      "step": 770
+    },
+    {
+      "epoch": 2.5731958762886595,
+      "grad_norm": 0.6953072761863929,
+      "learning_rate": 5e-06,
+      "loss": 0.6599,
+      "step": 780
+    },
+    {
+      "epoch": 2.6061855670103093,
+      "grad_norm": 0.7683634702318719,
+      "learning_rate": 5e-06,
+      "loss": 0.6584,
+      "step": 790
+    },
+    {
+      "epoch": 2.6391752577319587,
+      "grad_norm": 0.9202931242949187,
+      "learning_rate": 5e-06,
+      "loss": 0.6599,
+      "step": 800
+    },
+    {
+      "epoch": 2.6721649484536085,
+      "grad_norm": 0.6785534766587453,
+      "learning_rate": 5e-06,
+      "loss": 0.6605,
+      "step": 810
+    },
+    {
+      "epoch": 2.705154639175258,
+      "grad_norm": 0.9373759072613878,
+      "learning_rate": 5e-06,
+      "loss": 0.6633,
+      "step": 820
+    },
+    {
+      "epoch": 2.7381443298969073,
+      "grad_norm": 0.5316447851690145,
+      "learning_rate": 5e-06,
+      "loss": 0.6582,
+      "step": 830
+    },
+    {
+      "epoch": 2.7711340206185566,
+      "grad_norm": 0.7810499110998566,
+      "learning_rate": 5e-06,
+      "loss": 0.6638,
+      "step": 840
+    },
+    {
+      "epoch": 2.804123711340206,
+      "grad_norm": 0.5581690358208933,
+      "learning_rate": 5e-06,
+      "loss": 0.6572,
+      "step": 850
+    },
+    {
+      "epoch": 2.837113402061856,
+      "grad_norm": 0.5757480690524878,
+      "learning_rate": 5e-06,
+      "loss": 0.6629,
+      "step": 860
+    },
+    {
+      "epoch": 2.870103092783505,
+      "grad_norm": 0.6570394054126519,
+      "learning_rate": 5e-06,
+      "loss": 0.6603,
+      "step": 870
+    },
+    {
+      "epoch": 2.9030927835051545,
+      "grad_norm": 0.5532161107989387,
+      "learning_rate": 5e-06,
+      "loss": 0.6596,
+      "step": 880
+    },
+    {
+      "epoch": 2.9360824742268044,
+      "grad_norm": 0.6779485831959426,
+      "learning_rate": 5e-06,
+      "loss": 0.6615,
+      "step": 890
+    },
+    {
+      "epoch": 2.9690721649484537,
+      "grad_norm": 0.6105580266011457,
+      "learning_rate": 5e-06,
+      "loss": 0.6573,
+      "step": 900
+    },
+    {
+      "epoch": 2.9987628865979383,
+      "eval_loss": 0.737443208694458,
+      "eval_runtime": 321.3627,
+      "eval_samples_per_second": 25.417,
+      "eval_steps_per_second": 0.398,
+      "step": 909
+    },
+    {
+      "epoch": 2.9987628865979383,
+      "step": 909,
+      "total_flos": 1522399476449280.0,
+      "train_loss": 0.7216839113644641,
+      "train_runtime": 53747.0627,
+      "train_samples_per_second": 8.662,
+      "train_steps_per_second": 0.017
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 909,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1522399476449280.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed