sweet-dreambooths
/

black-eyed-peas-v1-crafted-variable-prompt-16-epochs-text-only-piano-prompts

@@ -2,6 +2,8 @@
 license: cc-by-nc-4.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: facebook/musicgen-melody-large
 model-index:
@@ -16,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/sanchit-gandhi/black-eyed-peas/runs/weg78tae)
 # black-eyed-peas-v1-crafted-variable-prompt-16-epochs-text-only-piano-prompts
-This model is a fine-tuned version of [facebook/musicgen-melody-large](https://huggingface.co/facebook/musicgen-melody-large) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.6373
-- Text Cosine Sim: 0.2610
-- Audio Cosine Sim: 0.5900
 ## Model description

 license: cc-by-nc-4.0
 library_name: peft
 tags:
+- text-to-audio
+- sweet-dreambooths/black-eyed-peas-v1-piano-prompts
 - generated_from_trainer
 base_model: facebook/musicgen-melody-large
 model-index:
 [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/sanchit-gandhi/black-eyed-peas/runs/weg78tae)
 # black-eyed-peas-v1-crafted-variable-prompt-16-epochs-text-only-piano-prompts
+This model is a fine-tuned version of [facebook/musicgen-melody-large](https://huggingface.co/facebook/musicgen-melody-large) on the SWEET-DREAMBOOTHS/BLACK-EYED-PEAS-V1-PIANO-PROMPTS - DEFAULT dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.6533
+- Text Cosine Sim: 0.2462
+- Audio Cosine Sim: 0.5594
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 15.835051546391753,
+    "eval_audio_cosine_sim": 0.5594081878662109,
+    "eval_loss": 3.6532680988311768,
+    "eval_runtime": 1064.4939,
+    "eval_samples": 12,
+    "eval_samples_per_second": 0.011,
+    "eval_steps_per_second": 0.011,
+    "eval_text_cosine_sim": 0.24617904424667358,
+    "total_flos": 1925884422732936.0,
+    "train_loss": 7.454789996147156,
+    "train_runtime": 13331.2439,
+    "train_samples": 97,
+    "train_samples_per_second": 0.116,
+    "train_steps_per_second": 0.007
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 15.835051546391753,
+    "eval_audio_cosine_sim": 0.5594081878662109,
+    "eval_loss": 3.6532680988311768,
+    "eval_runtime": 1064.4939,
+    "eval_samples": 12,
+    "eval_samples_per_second": 0.011,
+    "eval_steps_per_second": 0.011,
+    "eval_text_cosine_sim": 0.24617904424667358
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 15.835051546391753,
+    "total_flos": 1925884422732936.0,
+    "train_loss": 7.454789996147156,
+    "train_runtime": 13331.2439,
+    "train_samples": 97,
+    "train_samples_per_second": 0.116,
+    "train_steps_per_second": 0.007
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,744 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.835051546391753,
+  "eval_steps": 30,
+  "global_step": 96,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16494845360824742,
+      "grad_norm": 1.6045022010803223,
+      "learning_rate": 2.9687500000000003e-05,
+      "loss": 10.1016,
+      "step": 1
+    },
+    {
+      "epoch": 0.32989690721649484,
+      "grad_norm": 1.7116177082061768,
+      "learning_rate": 2.9375e-05,
+      "loss": 10.2357,
+      "step": 2
+    },
+    {
+      "epoch": 0.4948453608247423,
+      "grad_norm": 1.7453874349594116,
+      "learning_rate": 2.90625e-05,
+      "loss": 10.0525,
+      "step": 3
+    },
+    {
+      "epoch": 0.6597938144329897,
+      "grad_norm": 2.173015594482422,
+      "learning_rate": 2.875e-05,
+      "loss": 10.2,
+      "step": 4
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 2.119373083114624,
+      "learning_rate": 2.84375e-05,
+      "loss": 10.0632,
+      "step": 5
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "grad_norm": 2.180950403213501,
+      "learning_rate": 2.8125e-05,
+      "loss": 9.8459,
+      "step": 6
+    },
+    {
+      "epoch": 1.1546391752577319,
+      "grad_norm": 2.3363752365112305,
+      "learning_rate": 2.7812500000000002e-05,
+      "loss": 9.9087,
+      "step": 7
+    },
+    {
+      "epoch": 1.3195876288659794,
+      "grad_norm": 2.6667237281799316,
+      "learning_rate": 2.75e-05,
+      "loss": 9.8297,
+      "step": 8
+    },
+    {
+      "epoch": 1.4845360824742269,
+      "grad_norm": 3.121326446533203,
+      "learning_rate": 2.71875e-05,
+      "loss": 9.8469,
+      "step": 9
+    },
+    {
+      "epoch": 1.6494845360824741,
+      "grad_norm": 2.7169275283813477,
+      "learning_rate": 2.6875000000000003e-05,
+      "loss": 9.5758,
+      "step": 10
+    },
+    {
+      "epoch": 1.8144329896907216,
+      "grad_norm": 2.9351377487182617,
+      "learning_rate": 2.65625e-05,
+      "loss": 9.5564,
+      "step": 11
+    },
+    {
+      "epoch": 1.9793814432989691,
+      "grad_norm": 3.241955041885376,
+      "learning_rate": 2.625e-05,
+      "loss": 9.4493,
+      "step": 12
+    },
+    {
+      "epoch": 2.1443298969072164,
+      "grad_norm": 3.388002872467041,
+      "learning_rate": 2.59375e-05,
+      "loss": 9.3817,
+      "step": 13
+    },
+    {
+      "epoch": 2.3092783505154637,
+      "grad_norm": 3.465426445007324,
+      "learning_rate": 2.5625e-05,
+      "loss": 9.2322,
+      "step": 14
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 3.8056743144989014,
+      "learning_rate": 2.5312500000000002e-05,
+      "loss": 9.1842,
+      "step": 15
+    },
+    {
+      "epoch": 2.6391752577319587,
+      "grad_norm": 3.7609691619873047,
+      "learning_rate": 2.5e-05,
+      "loss": 9.0523,
+      "step": 16
+    },
+    {
+      "epoch": 2.804123711340206,
+      "grad_norm": 3.9078407287597656,
+      "learning_rate": 2.46875e-05,
+      "loss": 8.9138,
+      "step": 17
+    },
+    {
+      "epoch": 2.9690721649484537,
+      "grad_norm": 3.792387008666992,
+      "learning_rate": 2.4375e-05,
+      "loss": 8.829,
+      "step": 18
+    },
+    {
+      "epoch": 3.134020618556701,
+      "grad_norm": 3.535268783569336,
+      "learning_rate": 2.4062500000000002e-05,
+      "loss": 8.6985,
+      "step": 19
+    },
+    {
+      "epoch": 3.2989690721649483,
+      "grad_norm": 3.926476240158081,
+      "learning_rate": 2.3749999999999998e-05,
+      "loss": 8.5909,
+      "step": 20
+    },
+    {
+      "epoch": 3.463917525773196,
+      "grad_norm": 3.3404812812805176,
+      "learning_rate": 2.34375e-05,
+      "loss": 8.5582,
+      "step": 21
+    },
+    {
+      "epoch": 3.6288659793814433,
+      "grad_norm": 3.4405341148376465,
+      "learning_rate": 2.3125000000000003e-05,
+      "loss": 8.3595,
+      "step": 22
+    },
+    {
+      "epoch": 3.7938144329896906,
+      "grad_norm": 3.84586238861084,
+      "learning_rate": 2.28125e-05,
+      "loss": 8.2115,
+      "step": 23
+    },
+    {
+      "epoch": 3.9587628865979383,
+      "grad_norm": 3.3119468688964844,
+      "learning_rate": 2.25e-05,
+      "loss": 8.1774,
+      "step": 24
+    },
+    {
+      "epoch": 4.123711340206185,
+      "grad_norm": 3.2873129844665527,
+      "learning_rate": 2.21875e-05,
+      "loss": 8.1239,
+      "step": 25
+    },
+    {
+      "epoch": 4.288659793814433,
+      "grad_norm": 3.5876059532165527,
+      "learning_rate": 2.1875e-05,
+      "loss": 8.1053,
+      "step": 26
+    },
+    {
+      "epoch": 4.453608247422681,
+      "grad_norm": 3.688255786895752,
+      "learning_rate": 2.15625e-05,
+      "loss": 7.9861,
+      "step": 27
+    },
+    {
+      "epoch": 4.618556701030927,
+      "grad_norm": 4.39879846572876,
+      "learning_rate": 2.125e-05,
+      "loss": 7.8541,
+      "step": 28
+    },
+    {
+      "epoch": 4.783505154639175,
+      "grad_norm": 4.439304828643799,
+      "learning_rate": 2.09375e-05,
+      "loss": 7.9102,
+      "step": 29
+    },
+    {
+      "epoch": 4.948453608247423,
+      "grad_norm": 3.9502387046813965,
+      "learning_rate": 2.0625e-05,
+      "loss": 7.791,
+      "step": 30
+    },
+    {
+      "epoch": 4.948453608247423,
+      "eval_audio_cosine_sim": 0.5583739280700684,
+      "eval_loss": 3.1003806591033936,
+      "eval_runtime": 2123.8603,
+      "eval_samples_per_second": 0.006,
+      "eval_steps_per_second": 0.006,
+      "eval_text_cosine_sim": 0.376191645860672,
+      "step": 30
+    },
+    {
+      "epoch": 5.11340206185567,
+      "grad_norm": 4.569061756134033,
+      "learning_rate": 2.0312500000000002e-05,
+      "loss": 7.746,
+      "step": 31
+    },
+    {
+      "epoch": 5.278350515463917,
+      "grad_norm": 4.3288702964782715,
+      "learning_rate": 1.9999999999999998e-05,
+      "loss": 7.5649,
+      "step": 32
+    },
+    {
+      "epoch": 5.443298969072165,
+      "grad_norm": 4.021690368652344,
+      "learning_rate": 1.96875e-05,
+      "loss": 7.7969,
+      "step": 33
+    },
+    {
+      "epoch": 5.608247422680412,
+      "grad_norm": 3.7971837520599365,
+      "learning_rate": 1.9375e-05,
+      "loss": 7.5618,
+      "step": 34
+    },
+    {
+      "epoch": 5.77319587628866,
+      "grad_norm": 3.3054466247558594,
+      "learning_rate": 1.90625e-05,
+      "loss": 7.5207,
+      "step": 35
+    },
+    {
+      "epoch": 5.938144329896907,
+      "grad_norm": 3.0602829456329346,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 7.3319,
+      "step": 36
+    },
+    {
+      "epoch": 6.103092783505154,
+      "grad_norm": 3.018862009048462,
+      "learning_rate": 1.84375e-05,
+      "loss": 7.2311,
+      "step": 37
+    },
+    {
+      "epoch": 6.268041237113402,
+      "grad_norm": 2.6457912921905518,
+      "learning_rate": 1.8125e-05,
+      "loss": 7.2177,
+      "step": 38
+    },
+    {
+      "epoch": 6.43298969072165,
+      "grad_norm": 2.6215384006500244,
+      "learning_rate": 1.78125e-05,
+      "loss": 7.2411,
+      "step": 39
+    },
+    {
+      "epoch": 6.597938144329897,
+      "grad_norm": 2.6344871520996094,
+      "learning_rate": 1.7500000000000002e-05,
+      "loss": 7.2866,
+      "step": 40
+    },
+    {
+      "epoch": 6.762886597938144,
+      "grad_norm": 2.753688335418701,
+      "learning_rate": 1.7187499999999998e-05,
+      "loss": 7.3974,
+      "step": 41
+    },
+    {
+      "epoch": 6.927835051546392,
+      "grad_norm": 2.540292501449585,
+      "learning_rate": 1.6875e-05,
+      "loss": 7.2237,
+      "step": 42
+    },
+    {
+      "epoch": 7.092783505154639,
+      "grad_norm": 2.3271636962890625,
+      "learning_rate": 1.6562500000000003e-05,
+      "loss": 7.0722,
+      "step": 43
+    },
+    {
+      "epoch": 7.257731958762887,
+      "grad_norm": 2.6378014087677,
+      "learning_rate": 1.625e-05,
+      "loss": 6.9758,
+      "step": 44
+    },
+    {
+      "epoch": 7.422680412371134,
+      "grad_norm": 2.6299326419830322,
+      "learning_rate": 1.59375e-05,
+      "loss": 7.2835,
+      "step": 45
+    },
+    {
+      "epoch": 7.587628865979381,
+      "grad_norm": 2.400949716567993,
+      "learning_rate": 1.5625e-05,
+      "loss": 7.1064,
+      "step": 46
+    },
+    {
+      "epoch": 7.752577319587629,
+      "grad_norm": 2.3601369857788086,
+      "learning_rate": 1.53125e-05,
+      "loss": 6.7368,
+      "step": 47
+    },
+    {
+      "epoch": 7.917525773195877,
+      "grad_norm": 2.3395187854766846,
+      "learning_rate": 1.5e-05,
+      "loss": 7.045,
+      "step": 48
+    },
+    {
+      "epoch": 8.082474226804123,
+      "grad_norm": 2.2277605533599854,
+      "learning_rate": 1.46875e-05,
+      "loss": 6.8241,
+      "step": 49
+    },
+    {
+      "epoch": 8.24742268041237,
+      "grad_norm": 2.162241220474243,
+      "learning_rate": 1.4375e-05,
+      "loss": 6.9879,
+      "step": 50
+    },
+    {
+      "epoch": 8.412371134020619,
+      "grad_norm": 2.038295030593872,
+      "learning_rate": 1.40625e-05,
+      "loss": 6.8768,
+      "step": 51
+    },
+    {
+      "epoch": 8.577319587628866,
+      "grad_norm": 1.980531930923462,
+      "learning_rate": 1.375e-05,
+      "loss": 6.7726,
+      "step": 52
+    },
+    {
+      "epoch": 8.742268041237114,
+      "grad_norm": 1.9837162494659424,
+      "learning_rate": 1.3437500000000001e-05,
+      "loss": 6.6858,
+      "step": 53
+    },
+    {
+      "epoch": 8.907216494845361,
+      "grad_norm": 2.0047826766967773,
+      "learning_rate": 1.3125e-05,
+      "loss": 6.8887,
+      "step": 54
+    },
+    {
+      "epoch": 9.072164948453608,
+      "grad_norm": 1.956314206123352,
+      "learning_rate": 1.28125e-05,
+      "loss": 6.7538,
+      "step": 55
+    },
+    {
+      "epoch": 9.237113402061855,
+      "grad_norm": 1.6746240854263306,
+      "learning_rate": 1.25e-05,
+      "loss": 6.6686,
+      "step": 56
+    },
+    {
+      "epoch": 9.402061855670103,
+      "grad_norm": 1.9487406015396118,
+      "learning_rate": 1.21875e-05,
+      "loss": 6.61,
+      "step": 57
+    },
+    {
+      "epoch": 9.56701030927835,
+      "grad_norm": 1.6522157192230225,
+      "learning_rate": 1.1874999999999999e-05,
+      "loss": 6.7586,
+      "step": 58
+    },
+    {
+      "epoch": 9.731958762886597,
+      "grad_norm": 1.807323694229126,
+      "learning_rate": 1.1562500000000002e-05,
+      "loss": 6.5441,
+      "step": 59
+    },
+    {
+      "epoch": 9.896907216494846,
+      "grad_norm": 1.6479687690734863,
+      "learning_rate": 1.125e-05,
+      "loss": 6.6699,
+      "step": 60
+    },
+    {
+      "epoch": 9.896907216494846,
+      "eval_audio_cosine_sim": 0.6575908064842224,
+      "eval_loss": 3.235611915588379,
+      "eval_runtime": 2160.2372,
+      "eval_samples_per_second": 0.006,
+      "eval_steps_per_second": 0.006,
+      "eval_text_cosine_sim": 0.3653325140476227,
+      "step": 60
+    },
+    {
+      "epoch": 10.061855670103093,
+      "grad_norm": 1.6155036687850952,
+      "learning_rate": 1.09375e-05,
+      "loss": 6.6719,
+      "step": 61
+    },
+    {
+      "epoch": 10.22680412371134,
+      "grad_norm": 1.5656815767288208,
+      "learning_rate": 1.0625e-05,
+      "loss": 6.6531,
+      "step": 62
+    },
+    {
+      "epoch": 10.391752577319588,
+      "grad_norm": 1.4706956148147583,
+      "learning_rate": 1.03125e-05,
+      "loss": 6.6168,
+      "step": 63
+    },
+    {
+      "epoch": 10.556701030927835,
+      "grad_norm": 1.5122315883636475,
+      "learning_rate": 9.999999999999999e-06,
+      "loss": 6.4704,
+      "step": 64
+    },
+    {
+      "epoch": 10.721649484536082,
+      "grad_norm": 1.3326685428619385,
+      "learning_rate": 9.6875e-06,
+      "loss": 6.5744,
+      "step": 65
+    },
+    {
+      "epoch": 10.88659793814433,
+      "grad_norm": 1.4799164533615112,
+      "learning_rate": 9.375000000000001e-06,
+      "loss": 6.5442,
+      "step": 66
+    },
+    {
+      "epoch": 11.051546391752577,
+      "grad_norm": 1.6707019805908203,
+      "learning_rate": 9.0625e-06,
+      "loss": 6.3603,
+      "step": 67
+    },
+    {
+      "epoch": 11.216494845360824,
+      "grad_norm": 1.5303987264633179,
+      "learning_rate": 8.750000000000001e-06,
+      "loss": 6.3026,
+      "step": 68
+    },
+    {
+      "epoch": 11.381443298969073,
+      "grad_norm": 1.4235777854919434,
+      "learning_rate": 8.4375e-06,
+      "loss": 6.3932,
+      "step": 69
+    },
+    {
+      "epoch": 11.54639175257732,
+      "grad_norm": 1.7168058156967163,
+      "learning_rate": 8.125e-06,
+      "loss": 6.6932,
+      "step": 70
+    },
+    {
+      "epoch": 11.711340206185566,
+      "grad_norm": 1.4212720394134521,
+      "learning_rate": 7.8125e-06,
+      "loss": 6.3352,
+      "step": 71
+    },
+    {
+      "epoch": 11.876288659793815,
+      "grad_norm": 1.3087238073349,
+      "learning_rate": 7.5e-06,
+      "loss": 6.5214,
+      "step": 72
+    },
+    {
+      "epoch": 12.041237113402062,
+      "grad_norm": 1.343590259552002,
+      "learning_rate": 7.1875e-06,
+      "loss": 6.5502,
+      "step": 73
+    },
+    {
+      "epoch": 12.206185567010309,
+      "grad_norm": 1.1480090618133545,
+      "learning_rate": 6.875e-06,
+      "loss": 6.3793,
+      "step": 74
+    },
+    {
+      "epoch": 12.371134020618557,
+      "grad_norm": 1.4936941862106323,
+      "learning_rate": 6.5625e-06,
+      "loss": 6.5373,
+      "step": 75
+    },
+    {
+      "epoch": 12.536082474226804,
+      "grad_norm": 1.130237102508545,
+      "learning_rate": 6.25e-06,
+      "loss": 6.4033,
+      "step": 76
+    },
+    {
+      "epoch": 12.70103092783505,
+      "grad_norm": 1.4036816358566284,
+      "learning_rate": 5.9374999999999995e-06,
+      "loss": 6.382,
+      "step": 77
+    },
+    {
+      "epoch": 12.8659793814433,
+      "grad_norm": 1.4101227521896362,
+      "learning_rate": 5.625e-06,
+      "loss": 6.5297,
+      "step": 78
+    },
+    {
+      "epoch": 13.030927835051546,
+      "grad_norm": 1.2091238498687744,
+      "learning_rate": 5.3125e-06,
+      "loss": 6.4248,
+      "step": 79
+    },
+    {
+      "epoch": 13.195876288659793,
+      "grad_norm": 1.0961215496063232,
+      "learning_rate": 4.9999999999999996e-06,
+      "loss": 6.4123,
+      "step": 80
+    },
+    {
+      "epoch": 13.360824742268042,
+      "grad_norm": 1.08103609085083,
+      "learning_rate": 4.6875000000000004e-06,
+      "loss": 6.3088,
+      "step": 81
+    },
+    {
+      "epoch": 13.525773195876289,
+      "grad_norm": 1.4263334274291992,
+      "learning_rate": 4.3750000000000005e-06,
+      "loss": 6.5992,
+      "step": 82
+    },
+    {
+      "epoch": 13.690721649484535,
+      "grad_norm": 1.2003765106201172,
+      "learning_rate": 4.0625e-06,
+      "loss": 6.3439,
+      "step": 83
+    },
+    {
+      "epoch": 13.855670103092784,
+      "grad_norm": 1.0816315412521362,
+      "learning_rate": 3.75e-06,
+      "loss": 6.4004,
+      "step": 84
+    },
+    {
+      "epoch": 14.02061855670103,
+      "grad_norm": 1.4043329954147339,
+      "learning_rate": 3.4375e-06,
+      "loss": 6.1602,
+      "step": 85
+    },
+    {
+      "epoch": 14.185567010309278,
+      "grad_norm": 1.262394666671753,
+      "learning_rate": 3.125e-06,
+      "loss": 6.2039,
+      "step": 86
+    },
+    {
+      "epoch": 14.350515463917526,
+      "grad_norm": 1.147353172302246,
+      "learning_rate": 2.8125e-06,
+      "loss": 6.4834,
+      "step": 87
+    },
+    {
+      "epoch": 14.515463917525773,
+      "grad_norm": 1.0577898025512695,
+      "learning_rate": 2.4999999999999998e-06,
+      "loss": 6.3941,
+      "step": 88
+    },
+    {
+      "epoch": 14.68041237113402,
+      "grad_norm": 1.0920854806900024,
+      "learning_rate": 2.1875000000000002e-06,
+      "loss": 6.4342,
+      "step": 89
+    },
+    {
+      "epoch": 14.845360824742269,
+      "grad_norm": 1.0742192268371582,
+      "learning_rate": 1.875e-06,
+      "loss": 6.4394,
+      "step": 90
+    },
+    {
+      "epoch": 14.845360824742269,
+      "eval_audio_cosine_sim": 0.5900315642356873,
+      "eval_loss": 3.637291669845581,
+      "eval_runtime": 1070.3012,
+      "eval_samples_per_second": 0.011,
+      "eval_steps_per_second": 0.011,
+      "eval_text_cosine_sim": 0.26104021072387695,
+      "step": 90
+    },
+    {
+      "epoch": 15.010309278350515,
+      "grad_norm": 1.2300424575805664,
+      "learning_rate": 1.5625e-06,
+      "loss": 6.2831,
+      "step": 91
+    },
+    {
+      "epoch": 15.175257731958762,
+      "grad_norm": 1.1345258951187134,
+      "learning_rate": 1.2499999999999999e-06,
+      "loss": 6.4745,
+      "step": 92
+    },
+    {
+      "epoch": 15.34020618556701,
+      "grad_norm": 1.109592318534851,
+      "learning_rate": 9.375e-07,
+      "loss": 6.249,
+      "step": 93
+    },
+    {
+      "epoch": 15.505154639175258,
+      "grad_norm": 1.1140637397766113,
+      "learning_rate": 6.249999999999999e-07,
+      "loss": 6.3171,
+      "step": 94
+    },
+    {
+      "epoch": 15.670103092783505,
+      "grad_norm": 1.1929951906204224,
+      "learning_rate": 3.1249999999999997e-07,
+      "loss": 6.2597,
+      "step": 95
+    },
+    {
+      "epoch": 15.835051546391753,
+      "grad_norm": 1.2717913389205933,
+      "learning_rate": 0.0,
+      "loss": 6.548,
+      "step": 96
+    },
+    {
+      "epoch": 15.835051546391753,
+      "step": 96,
+      "total_flos": 1925884422732936.0,
+      "train_loss": 7.454789996147156,
+      "train_runtime": 13331.2439,
+      "train_samples_per_second": 0.116,
+      "train_steps_per_second": 0.007
+    }
+  ],
+  "logging_steps": 1.0,
+  "max_steps": 96,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 16,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1925884422732936.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}