Upload 16 files

Browse files

Files changed (16) hide show

README.md +56 -3
all_results.json +8 -0
checkpoint-102000/config.json +55 -0
checkpoint-102000/model.safetensors +3 -0
checkpoint-102000/optimizer.pt +3 -0
checkpoint-102000/preprocessor_config.json +36 -0
checkpoint-102000/rng_state.pth +3 -0
checkpoint-102000/scheduler.pt +3 -0
checkpoint-102000/trainer_state.json +1449 -0
checkpoint-102000/training_args.bin +3 -0
config.json +55 -0
model.safetensors +3 -0
preprocessor_config.json +36 -0
train_results.json +8 -0
trainer_state.json +1458 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,56 @@
----
-license: unknown
----

+---
+license: apache-2.0
+base_model: google/vit-base-patch16-224-in21k
+tags:
+- image-classification
+- vision
+- generated_from_trainer
+datasets:
+- imagefolder
+model-index:
+- name: network
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# network
+This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 3.0
+### Training results
+### Framework versions
+- Transformers 4.40.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.323969768236646e+19,
+    "train_loss": 0.0325347986571929,
+    "train_runtime": 10422.0944,
+    "train_samples_per_second": 78.295,
+    "train_steps_per_second": 9.787
+}

checkpoint-102000/config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224-in21k",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "finetuning_task": "image-classification",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "bb",
+    "1": "bk",
+    "10": "wp",
+    "11": "wq",
+    "12": "wr",
+    "2": "bn",
+    "3": "bp",
+    "4": "bq",
+    "5": "br",
+    "6": "empty",
+    "7": "wb",
+    "8": "wk",
+    "9": "wn"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "bb": "0",
+    "bk": "1",
+    "bn": "2",
+    "bp": "3",
+    "bq": "4",
+    "br": "5",
+    "empty": "6",
+    "wb": "7",
+    "wk": "8",
+    "wn": "9",
+    "wp": "10",
+    "wq": "11",
+    "wr": "12"
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.2"
+}

checkpoint-102000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fed5d30904017d84562df1d3f165e9382aa1a8c48564aa75d45eed1462f356e
+size 343257812

checkpoint-102000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b694b986d67cad49f8e40b98b5447aee2746498ecd58eadabab7f54e84231027
+size 686636474

checkpoint-102000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-102000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ee8a41d6942d75a9e923d4b9f35c51b16b26978b7849dbee21a63af2ec0308c
+size 14244

checkpoint-102000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a15cc9e02c1125fc853e1f62c577b38dbfb76d8a16bcf17f4c7a7f5ce95ee14
+size 1064

checkpoint-102000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1449 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 102000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014705882352941176,
+      "grad_norm": 0.35731062293052673,
+      "learning_rate": 4.975490196078432e-05,
+      "loss": 0.5354,
+      "step": 500
+    },
+    {
+      "epoch": 0.029411764705882353,
+      "grad_norm": 2.1595866680145264,
+      "learning_rate": 4.9509803921568634e-05,
+      "loss": 0.1627,
+      "step": 1000
+    },
+    {
+      "epoch": 0.04411764705882353,
+      "grad_norm": 0.14825384318828583,
+      "learning_rate": 4.9264705882352944e-05,
+      "loss": 0.101,
+      "step": 1500
+    },
+    {
+      "epoch": 0.058823529411764705,
+      "grad_norm": 0.07121703773736954,
+      "learning_rate": 4.901960784313725e-05,
+      "loss": 0.0952,
+      "step": 2000
+    },
+    {
+      "epoch": 0.07352941176470588,
+      "grad_norm": 0.06089532747864723,
+      "learning_rate": 4.877450980392157e-05,
+      "loss": 0.0881,
+      "step": 2500
+    },
+    {
+      "epoch": 0.08823529411764706,
+      "grad_norm": 0.037034619599580765,
+      "learning_rate": 4.8529411764705885e-05,
+      "loss": 0.0717,
+      "step": 3000
+    },
+    {
+      "epoch": 0.10294117647058823,
+      "grad_norm": 0.01999847963452339,
+      "learning_rate": 4.82843137254902e-05,
+      "loss": 0.0848,
+      "step": 3500
+    },
+    {
+      "epoch": 0.11764705882352941,
+      "grad_norm": 0.018650399520993233,
+      "learning_rate": 4.803921568627452e-05,
+      "loss": 0.068,
+      "step": 4000
+    },
+    {
+      "epoch": 0.1323529411764706,
+      "grad_norm": 0.023023229092359543,
+      "learning_rate": 4.7794117647058826e-05,
+      "loss": 0.0626,
+      "step": 4500
+    },
+    {
+      "epoch": 0.14705882352941177,
+      "grad_norm": 0.018715515732765198,
+      "learning_rate": 4.7549019607843135e-05,
+      "loss": 0.0555,
+      "step": 5000
+    },
+    {
+      "epoch": 0.16176470588235295,
+      "grad_norm": 0.11842140555381775,
+      "learning_rate": 4.730392156862745e-05,
+      "loss": 0.0658,
+      "step": 5500
+    },
+    {
+      "epoch": 0.17647058823529413,
+      "grad_norm": 0.04816881939768791,
+      "learning_rate": 4.705882352941177e-05,
+      "loss": 0.0648,
+      "step": 6000
+    },
+    {
+      "epoch": 0.19117647058823528,
+      "grad_norm": 0.025240018963813782,
+      "learning_rate": 4.681372549019608e-05,
+      "loss": 0.0632,
+      "step": 6500
+    },
+    {
+      "epoch": 0.20588235294117646,
+      "grad_norm": 0.007390766404569149,
+      "learning_rate": 4.656862745098039e-05,
+      "loss": 0.0705,
+      "step": 7000
+    },
+    {
+      "epoch": 0.22058823529411764,
+      "grad_norm": 0.011664963327348232,
+      "learning_rate": 4.632352941176471e-05,
+      "loss": 0.0552,
+      "step": 7500
+    },
+    {
+      "epoch": 0.23529411764705882,
+      "grad_norm": 39.69175720214844,
+      "learning_rate": 4.607843137254902e-05,
+      "loss": 0.0686,
+      "step": 8000
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 0.007366931065917015,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.061,
+      "step": 8500
+    },
+    {
+      "epoch": 0.2647058823529412,
+      "grad_norm": 0.003396671498194337,
+      "learning_rate": 4.558823529411765e-05,
+      "loss": 0.0464,
+      "step": 9000
+    },
+    {
+      "epoch": 0.27941176470588236,
+      "grad_norm": 0.045366521924734116,
+      "learning_rate": 4.5343137254901966e-05,
+      "loss": 0.053,
+      "step": 9500
+    },
+    {
+      "epoch": 0.29411764705882354,
+      "grad_norm": 1.137495756149292,
+      "learning_rate": 4.5098039215686275e-05,
+      "loss": 0.0484,
+      "step": 10000
+    },
+    {
+      "epoch": 0.3088235294117647,
+      "grad_norm": 0.0062417215667665005,
+      "learning_rate": 4.485294117647059e-05,
+      "loss": 0.0564,
+      "step": 10500
+    },
+    {
+      "epoch": 0.3235294117647059,
+      "grad_norm": 0.003990447614341974,
+      "learning_rate": 4.460784313725491e-05,
+      "loss": 0.0364,
+      "step": 11000
+    },
+    {
+      "epoch": 0.3382352941176471,
+      "grad_norm": 0.11569799482822418,
+      "learning_rate": 4.4362745098039216e-05,
+      "loss": 0.0436,
+      "step": 11500
+    },
+    {
+      "epoch": 0.35294117647058826,
+      "grad_norm": 0.01159907691180706,
+      "learning_rate": 4.411764705882353e-05,
+      "loss": 0.0456,
+      "step": 12000
+    },
+    {
+      "epoch": 0.36764705882352944,
+      "grad_norm": 10.926911354064941,
+      "learning_rate": 4.387254901960784e-05,
+      "loss": 0.0464,
+      "step": 12500
+    },
+    {
+      "epoch": 0.38235294117647056,
+      "grad_norm": 0.03444543853402138,
+      "learning_rate": 4.362745098039216e-05,
+      "loss": 0.0512,
+      "step": 13000
+    },
+    {
+      "epoch": 0.39705882352941174,
+      "grad_norm": 0.002395658055320382,
+      "learning_rate": 4.3382352941176474e-05,
+      "loss": 0.0586,
+      "step": 13500
+    },
+    {
+      "epoch": 0.4117647058823529,
+      "grad_norm": 0.010224021971225739,
+      "learning_rate": 4.313725490196079e-05,
+      "loss": 0.0407,
+      "step": 14000
+    },
+    {
+      "epoch": 0.4264705882352941,
+      "grad_norm": 0.010642035864293575,
+      "learning_rate": 4.28921568627451e-05,
+      "loss": 0.0536,
+      "step": 14500
+    },
+    {
+      "epoch": 0.4411764705882353,
+      "grad_norm": 0.00970557238906622,
+      "learning_rate": 4.2647058823529415e-05,
+      "loss": 0.0526,
+      "step": 15000
+    },
+    {
+      "epoch": 0.45588235294117646,
+      "grad_norm": 0.0030253385193645954,
+      "learning_rate": 4.2401960784313724e-05,
+      "loss": 0.0432,
+      "step": 15500
+    },
+    {
+      "epoch": 0.47058823529411764,
+      "grad_norm": 0.5423064827919006,
+      "learning_rate": 4.215686274509804e-05,
+      "loss": 0.0381,
+      "step": 16000
+    },
+    {
+      "epoch": 0.4852941176470588,
+      "grad_norm": 7.454125881195068,
+      "learning_rate": 4.1911764705882356e-05,
+      "loss": 0.0397,
+      "step": 16500
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 3.87744402885437,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.0564,
+      "step": 17000
+    },
+    {
+      "epoch": 0.5147058823529411,
+      "grad_norm": 33.39067840576172,
+      "learning_rate": 4.142156862745099e-05,
+      "loss": 0.0455,
+      "step": 17500
+    },
+    {
+      "epoch": 0.5294117647058824,
+      "grad_norm": 0.008246080949902534,
+      "learning_rate": 4.11764705882353e-05,
+      "loss": 0.0515,
+      "step": 18000
+    },
+    {
+      "epoch": 0.5441176470588235,
+      "grad_norm": 0.018734918907284737,
+      "learning_rate": 4.0931372549019607e-05,
+      "loss": 0.0453,
+      "step": 18500
+    },
+    {
+      "epoch": 0.5588235294117647,
+      "grad_norm": 0.007373027969151735,
+      "learning_rate": 4.068627450980392e-05,
+      "loss": 0.0485,
+      "step": 19000
+    },
+    {
+      "epoch": 0.5735294117647058,
+      "grad_norm": 0.0023267469368875027,
+      "learning_rate": 4.044117647058824e-05,
+      "loss": 0.0372,
+      "step": 19500
+    },
+    {
+      "epoch": 0.5882352941176471,
+      "grad_norm": 0.1311252862215042,
+      "learning_rate": 4.0196078431372555e-05,
+      "loss": 0.0442,
+      "step": 20000
+    },
+    {
+      "epoch": 0.6029411764705882,
+      "grad_norm": 0.003710985416546464,
+      "learning_rate": 3.9950980392156864e-05,
+      "loss": 0.0585,
+      "step": 20500
+    },
+    {
+      "epoch": 0.6176470588235294,
+      "grad_norm": 0.004861747846007347,
+      "learning_rate": 3.970588235294117e-05,
+      "loss": 0.0374,
+      "step": 21000
+    },
+    {
+      "epoch": 0.6323529411764706,
+      "grad_norm": 0.004351571202278137,
+      "learning_rate": 3.946078431372549e-05,
+      "loss": 0.0265,
+      "step": 21500
+    },
+    {
+      "epoch": 0.6470588235294118,
+      "grad_norm": 0.008851751685142517,
+      "learning_rate": 3.9215686274509805e-05,
+      "loss": 0.0314,
+      "step": 22000
+    },
+    {
+      "epoch": 0.6617647058823529,
+      "grad_norm": 0.0046307104639709,
+      "learning_rate": 3.897058823529412e-05,
+      "loss": 0.0445,
+      "step": 22500
+    },
+    {
+      "epoch": 0.6764705882352942,
+      "grad_norm": 0.002735880669206381,
+      "learning_rate": 3.872549019607844e-05,
+      "loss": 0.0433,
+      "step": 23000
+    },
+    {
+      "epoch": 0.6911764705882353,
+      "grad_norm": 0.0218490082770586,
+      "learning_rate": 3.8480392156862746e-05,
+      "loss": 0.0444,
+      "step": 23500
+    },
+    {
+      "epoch": 0.7058823529411765,
+      "grad_norm": 2.371448278427124,
+      "learning_rate": 3.8235294117647055e-05,
+      "loss": 0.033,
+      "step": 24000
+    },
+    {
+      "epoch": 0.7205882352941176,
+      "grad_norm": 0.005015780217945576,
+      "learning_rate": 3.799019607843137e-05,
+      "loss": 0.0422,
+      "step": 24500
+    },
+    {
+      "epoch": 0.7352941176470589,
+      "grad_norm": 1.3102850914001465,
+      "learning_rate": 3.774509803921569e-05,
+      "loss": 0.035,
+      "step": 25000
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 0.004522955510765314,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0303,
+      "step": 25500
+    },
+    {
+      "epoch": 0.7647058823529411,
+      "grad_norm": 0.006077844649553299,
+      "learning_rate": 3.725490196078432e-05,
+      "loss": 0.0503,
+      "step": 26000
+    },
+    {
+      "epoch": 0.7794117647058824,
+      "grad_norm": 0.0026446939446032047,
+      "learning_rate": 3.700980392156863e-05,
+      "loss": 0.0264,
+      "step": 26500
+    },
+    {
+      "epoch": 0.7941176470588235,
+      "grad_norm": 0.004485867917537689,
+      "learning_rate": 3.6764705882352945e-05,
+      "loss": 0.0388,
+      "step": 27000
+    },
+    {
+      "epoch": 0.8088235294117647,
+      "grad_norm": 0.0073866695165634155,
+      "learning_rate": 3.6519607843137254e-05,
+      "loss": 0.0283,
+      "step": 27500
+    },
+    {
+      "epoch": 0.8235294117647058,
+      "grad_norm": 0.012984287925064564,
+      "learning_rate": 3.627450980392157e-05,
+      "loss": 0.0393,
+      "step": 28000
+    },
+    {
+      "epoch": 0.8382352941176471,
+      "grad_norm": 0.01751883700489998,
+      "learning_rate": 3.6029411764705886e-05,
+      "loss": 0.0388,
+      "step": 28500
+    },
+    {
+      "epoch": 0.8529411764705882,
+      "grad_norm": 0.0035843336954712868,
+      "learning_rate": 3.5784313725490195e-05,
+      "loss": 0.0384,
+      "step": 29000
+    },
+    {
+      "epoch": 0.8676470588235294,
+      "grad_norm": 0.0645672082901001,
+      "learning_rate": 3.553921568627451e-05,
+      "loss": 0.0378,
+      "step": 29500
+    },
+    {
+      "epoch": 0.8823529411764706,
+      "grad_norm": 0.008566264994442463,
+      "learning_rate": 3.529411764705883e-05,
+      "loss": 0.0315,
+      "step": 30000
+    },
+    {
+      "epoch": 0.8970588235294118,
+      "grad_norm": 0.010571740567684174,
+      "learning_rate": 3.5049019607843136e-05,
+      "loss": 0.0324,
+      "step": 30500
+    },
+    {
+      "epoch": 0.9117647058823529,
+      "grad_norm": 0.0022533361334353685,
+      "learning_rate": 3.480392156862745e-05,
+      "loss": 0.0292,
+      "step": 31000
+    },
+    {
+      "epoch": 0.9264705882352942,
+      "grad_norm": 0.006164130289107561,
+      "learning_rate": 3.455882352941177e-05,
+      "loss": 0.0407,
+      "step": 31500
+    },
+    {
+      "epoch": 0.9411764705882353,
+      "grad_norm": 0.007435985840857029,
+      "learning_rate": 3.431372549019608e-05,
+      "loss": 0.0449,
+      "step": 32000
+    },
+    {
+      "epoch": 0.9558823529411765,
+      "grad_norm": 0.003777585458010435,
+      "learning_rate": 3.4068627450980394e-05,
+      "loss": 0.0247,
+      "step": 32500
+    },
+    {
+      "epoch": 0.9705882352941176,
+      "grad_norm": 0.005975374951958656,
+      "learning_rate": 3.382352941176471e-05,
+      "loss": 0.0309,
+      "step": 33000
+    },
+    {
+      "epoch": 0.9852941176470589,
+      "grad_norm": 0.0016012012492865324,
+      "learning_rate": 3.357843137254902e-05,
+      "loss": 0.0294,
+      "step": 33500
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.020027656108140945,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.022,
+      "step": 34000
+    },
+    {
+      "epoch": 1.0147058823529411,
+      "grad_norm": 10.916418075561523,
+      "learning_rate": 3.308823529411765e-05,
+      "loss": 0.0264,
+      "step": 34500
+    },
+    {
+      "epoch": 1.0294117647058822,
+      "grad_norm": 0.013728056102991104,
+      "learning_rate": 3.284313725490196e-05,
+      "loss": 0.032,
+      "step": 35000
+    },
+    {
+      "epoch": 1.0441176470588236,
+      "grad_norm": 0.02546643279492855,
+      "learning_rate": 3.2598039215686276e-05,
+      "loss": 0.0404,
+      "step": 35500
+    },
+    {
+      "epoch": 1.0588235294117647,
+      "grad_norm": 0.004185052588582039,
+      "learning_rate": 3.235294117647059e-05,
+      "loss": 0.0306,
+      "step": 36000
+    },
+    {
+      "epoch": 1.0735294117647058,
+      "grad_norm": 0.016653403639793396,
+      "learning_rate": 3.210784313725491e-05,
+      "loss": 0.0217,
+      "step": 36500
+    },
+    {
+      "epoch": 1.088235294117647,
+      "grad_norm": 0.0028331661596894264,
+      "learning_rate": 3.186274509803922e-05,
+      "loss": 0.0265,
+      "step": 37000
+    },
+    {
+      "epoch": 1.1029411764705883,
+      "grad_norm": 0.013931985944509506,
+      "learning_rate": 3.161764705882353e-05,
+      "loss": 0.0399,
+      "step": 37500
+    },
+    {
+      "epoch": 1.1176470588235294,
+      "grad_norm": 0.001668413169682026,
+      "learning_rate": 3.137254901960784e-05,
+      "loss": 0.0202,
+      "step": 38000
+    },
+    {
+      "epoch": 1.1323529411764706,
+      "grad_norm": 0.00567347789183259,
+      "learning_rate": 3.112745098039216e-05,
+      "loss": 0.0336,
+      "step": 38500
+    },
+    {
+      "epoch": 1.1470588235294117,
+      "grad_norm": 0.003477458842098713,
+      "learning_rate": 3.0882352941176475e-05,
+      "loss": 0.0253,
+      "step": 39000
+    },
+    {
+      "epoch": 1.161764705882353,
+      "grad_norm": 0.21819466352462769,
+      "learning_rate": 3.063725490196079e-05,
+      "loss": 0.0293,
+      "step": 39500
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.004708552733063698,
+      "learning_rate": 3.0392156862745097e-05,
+      "loss": 0.0268,
+      "step": 40000
+    },
+    {
+      "epoch": 1.1911764705882353,
+      "grad_norm": 0.0018363581039011478,
+      "learning_rate": 3.0147058823529413e-05,
+      "loss": 0.0266,
+      "step": 40500
+    },
+    {
+      "epoch": 1.2058823529411764,
+      "grad_norm": 0.005581580102443695,
+      "learning_rate": 2.9901960784313725e-05,
+      "loss": 0.0361,
+      "step": 41000
+    },
+    {
+      "epoch": 1.2205882352941178,
+      "grad_norm": 0.0035322746261954308,
+      "learning_rate": 2.965686274509804e-05,
+      "loss": 0.0274,
+      "step": 41500
+    },
+    {
+      "epoch": 1.2352941176470589,
+      "grad_norm": 0.0038708180654793978,
+      "learning_rate": 2.9411764705882354e-05,
+      "loss": 0.0241,
+      "step": 42000
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.0019606975838541985,
+      "learning_rate": 2.916666666666667e-05,
+      "loss": 0.0219,
+      "step": 42500
+    },
+    {
+      "epoch": 1.2647058823529411,
+      "grad_norm": 0.00241417670622468,
+      "learning_rate": 2.8921568627450986e-05,
+      "loss": 0.0244,
+      "step": 43000
+    },
+    {
+      "epoch": 1.2794117647058822,
+      "grad_norm": 0.003593308152630925,
+      "learning_rate": 2.8676470588235295e-05,
+      "loss": 0.027,
+      "step": 43500
+    },
+    {
+      "epoch": 1.2941176470588236,
+      "grad_norm": 1.4003372192382812,
+      "learning_rate": 2.8431372549019608e-05,
+      "loss": 0.0392,
+      "step": 44000
+    },
+    {
+      "epoch": 1.3088235294117647,
+      "grad_norm": 0.41921645402908325,
+      "learning_rate": 2.8186274509803924e-05,
+      "loss": 0.0299,
+      "step": 44500
+    },
+    {
+      "epoch": 1.3235294117647058,
+      "grad_norm": 1.567896842956543,
+      "learning_rate": 2.7941176470588236e-05,
+      "loss": 0.0282,
+      "step": 45000
+    },
+    {
+      "epoch": 1.3382352941176472,
+      "grad_norm": 0.003143745008856058,
+      "learning_rate": 2.7696078431372552e-05,
+      "loss": 0.0197,
+      "step": 45500
+    },
+    {
+      "epoch": 1.3529411764705883,
+      "grad_norm": 0.0016428233357146382,
+      "learning_rate": 2.7450980392156865e-05,
+      "loss": 0.0344,
+      "step": 46000
+    },
+    {
+      "epoch": 1.3676470588235294,
+      "grad_norm": 0.005276167765259743,
+      "learning_rate": 2.7205882352941174e-05,
+      "loss": 0.0312,
+      "step": 46500
+    },
+    {
+      "epoch": 1.3823529411764706,
+      "grad_norm": 0.004461635369807482,
+      "learning_rate": 2.696078431372549e-05,
+      "loss": 0.025,
+      "step": 47000
+    },
+    {
+      "epoch": 1.3970588235294117,
+      "grad_norm": 0.003238542238250375,
+      "learning_rate": 2.6715686274509806e-05,
+      "loss": 0.0236,
+      "step": 47500
+    },
+    {
+      "epoch": 1.4117647058823528,
+      "grad_norm": 0.0017602238804101944,
+      "learning_rate": 2.647058823529412e-05,
+      "loss": 0.0234,
+      "step": 48000
+    },
+    {
+      "epoch": 1.4264705882352942,
+      "grad_norm": 0.002768098609521985,
+      "learning_rate": 2.6225490196078435e-05,
+      "loss": 0.026,
+      "step": 48500
+    },
+    {
+      "epoch": 1.4411764705882353,
+      "grad_norm": 0.002579926745966077,
+      "learning_rate": 2.5980392156862747e-05,
+      "loss": 0.0167,
+      "step": 49000
+    },
+    {
+      "epoch": 1.4558823529411764,
+      "grad_norm": 0.008888750337064266,
+      "learning_rate": 2.5735294117647057e-05,
+      "loss": 0.0288,
+      "step": 49500
+    },
+    {
+      "epoch": 1.4705882352941178,
+      "grad_norm": 0.0038515792693942785,
+      "learning_rate": 2.5490196078431373e-05,
+      "loss": 0.0252,
+      "step": 50000
+    },
+    {
+      "epoch": 1.4852941176470589,
+      "grad_norm": 4.428643226623535,
+      "learning_rate": 2.5245098039215685e-05,
+      "loss": 0.016,
+      "step": 50500
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 0.002629584399983287,
+      "learning_rate": 2.5e-05,
+      "loss": 0.0214,
+      "step": 51000
+    },
+    {
+      "epoch": 1.5147058823529411,
+      "grad_norm": 0.004097859375178814,
+      "learning_rate": 2.4754901960784317e-05,
+      "loss": 0.0183,
+      "step": 51500
+    },
+    {
+      "epoch": 1.5294117647058822,
+      "grad_norm": 0.00568019924685359,
+      "learning_rate": 2.4509803921568626e-05,
+      "loss": 0.0271,
+      "step": 52000
+    },
+    {
+      "epoch": 1.5441176470588234,
+      "grad_norm": 0.0020534582436084747,
+      "learning_rate": 2.4264705882352942e-05,
+      "loss": 0.0281,
+      "step": 52500
+    },
+    {
+      "epoch": 1.5588235294117647,
+      "grad_norm": 0.003300599753856659,
+      "learning_rate": 2.401960784313726e-05,
+      "loss": 0.0284,
+      "step": 53000
+    },
+    {
+      "epoch": 1.5735294117647058,
+      "grad_norm": 0.001472037984058261,
+      "learning_rate": 2.3774509803921568e-05,
+      "loss": 0.0205,
+      "step": 53500
+    },
+    {
+      "epoch": 1.5882352941176472,
+      "grad_norm": 0.002835978288203478,
+      "learning_rate": 2.3529411764705884e-05,
+      "loss": 0.0234,
+      "step": 54000
+    },
+    {
+      "epoch": 1.6029411764705883,
+      "grad_norm": 0.003979724366217852,
+      "learning_rate": 2.3284313725490196e-05,
+      "loss": 0.0233,
+      "step": 54500
+    },
+    {
+      "epoch": 1.6176470588235294,
+      "grad_norm": 0.0036057273391634226,
+      "learning_rate": 2.303921568627451e-05,
+      "loss": 0.0319,
+      "step": 55000
+    },
+    {
+      "epoch": 1.6323529411764706,
+      "grad_norm": 0.0024822901468724012,
+      "learning_rate": 2.2794117647058825e-05,
+      "loss": 0.0211,
+      "step": 55500
+    },
+    {
+      "epoch": 1.6470588235294117,
+      "grad_norm": 0.008930359967052937,
+      "learning_rate": 2.2549019607843138e-05,
+      "loss": 0.0309,
+      "step": 56000
+    },
+    {
+      "epoch": 1.6617647058823528,
+      "grad_norm": 1.3381928205490112,
+      "learning_rate": 2.2303921568627454e-05,
+      "loss": 0.019,
+      "step": 56500
+    },
+    {
+      "epoch": 1.6764705882352942,
+      "grad_norm": 0.004481327719986439,
+      "learning_rate": 2.2058823529411766e-05,
+      "loss": 0.0228,
+      "step": 57000
+    },
+    {
+      "epoch": 1.6911764705882353,
+      "grad_norm": 0.012597435154020786,
+      "learning_rate": 2.181372549019608e-05,
+      "loss": 0.0285,
+      "step": 57500
+    },
+    {
+      "epoch": 1.7058823529411766,
+      "grad_norm": 0.0034783107694238424,
+      "learning_rate": 2.1568627450980395e-05,
+      "loss": 0.0165,
+      "step": 58000
+    },
+    {
+      "epoch": 1.7205882352941178,
+      "grad_norm": 0.0032083301339298487,
+      "learning_rate": 2.1323529411764707e-05,
+      "loss": 0.0306,
+      "step": 58500
+    },
+    {
+      "epoch": 1.7352941176470589,
+      "grad_norm": 0.0039000194519758224,
+      "learning_rate": 2.107843137254902e-05,
+      "loss": 0.0199,
+      "step": 59000
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 0.0034919639583677053,
+      "learning_rate": 2.0833333333333336e-05,
+      "loss": 0.034,
+      "step": 59500
+    },
+    {
+      "epoch": 1.7647058823529411,
+      "grad_norm": 0.004928025882691145,
+      "learning_rate": 2.058823529411765e-05,
+      "loss": 0.0213,
+      "step": 60000
+    },
+    {
+      "epoch": 1.7794117647058822,
+      "grad_norm": 0.003211489412933588,
+      "learning_rate": 2.034313725490196e-05,
+      "loss": 0.0206,
+      "step": 60500
+    },
+    {
+      "epoch": 1.7941176470588234,
+      "grad_norm": 0.011239697225391865,
+      "learning_rate": 2.0098039215686277e-05,
+      "loss": 0.0201,
+      "step": 61000
+    },
+    {
+      "epoch": 1.8088235294117647,
+      "grad_norm": 0.0024609589017927647,
+      "learning_rate": 1.9852941176470586e-05,
+      "loss": 0.0286,
+      "step": 61500
+    },
+    {
+      "epoch": 1.8235294117647058,
+      "grad_norm": 0.0046806493774056435,
+      "learning_rate": 1.9607843137254903e-05,
+      "loss": 0.0216,
+      "step": 62000
+    },
+    {
+      "epoch": 1.8382352941176472,
+      "grad_norm": 2.30717396736145,
+      "learning_rate": 1.936274509803922e-05,
+      "loss": 0.0246,
+      "step": 62500
+    },
+    {
+      "epoch": 1.8529411764705883,
+      "grad_norm": 0.00428669573739171,
+      "learning_rate": 1.9117647058823528e-05,
+      "loss": 0.0166,
+      "step": 63000
+    },
+    {
+      "epoch": 1.8676470588235294,
+      "grad_norm": 0.011403021402657032,
+      "learning_rate": 1.8872549019607844e-05,
+      "loss": 0.0272,
+      "step": 63500
+    },
+    {
+      "epoch": 1.8823529411764706,
+      "grad_norm": 0.0065813250839710236,
+      "learning_rate": 1.862745098039216e-05,
+      "loss": 0.0217,
+      "step": 64000
+    },
+    {
+      "epoch": 1.8970588235294117,
+      "grad_norm": 0.0024323465768247843,
+      "learning_rate": 1.8382352941176472e-05,
+      "loss": 0.0176,
+      "step": 64500
+    },
+    {
+      "epoch": 1.9117647058823528,
+      "grad_norm": 0.001616469700820744,
+      "learning_rate": 1.8137254901960785e-05,
+      "loss": 0.0169,
+      "step": 65000
+    },
+    {
+      "epoch": 1.9264705882352942,
+      "grad_norm": 0.004322522785514593,
+      "learning_rate": 1.7892156862745098e-05,
+      "loss": 0.0216,
+      "step": 65500
+    },
+    {
+      "epoch": 1.9411764705882353,
+      "grad_norm": 0.0024695000611245632,
+      "learning_rate": 1.7647058823529414e-05,
+      "loss": 0.032,
+      "step": 66000
+    },
+    {
+      "epoch": 1.9558823529411766,
+      "grad_norm": 0.010675052180886269,
+      "learning_rate": 1.7401960784313726e-05,
+      "loss": 0.023,
+      "step": 66500
+    },
+    {
+      "epoch": 1.9705882352941178,
+      "grad_norm": 0.49467232823371887,
+      "learning_rate": 1.715686274509804e-05,
+      "loss": 0.0169,
+      "step": 67000
+    },
+    {
+      "epoch": 1.9852941176470589,
+      "grad_norm": 0.01075649168342352,
+      "learning_rate": 1.6911764705882355e-05,
+      "loss": 0.0177,
+      "step": 67500
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.026703685522079468,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.0259,
+      "step": 68000
+    },
+    {
+      "epoch": 2.014705882352941,
+      "grad_norm": 0.0023890878073871136,
+      "learning_rate": 1.642156862745098e-05,
+      "loss": 0.0128,
+      "step": 68500
+    },
+    {
+      "epoch": 2.0294117647058822,
+      "grad_norm": 0.0015898487763479352,
+      "learning_rate": 1.6176470588235296e-05,
+      "loss": 0.0112,
+      "step": 69000
+    },
+    {
+      "epoch": 2.0441176470588234,
+      "grad_norm": 0.0041648312471807,
+      "learning_rate": 1.593137254901961e-05,
+      "loss": 0.0155,
+      "step": 69500
+    },
+    {
+      "epoch": 2.0588235294117645,
+      "grad_norm": 0.10951696336269379,
+      "learning_rate": 1.568627450980392e-05,
+      "loss": 0.0225,
+      "step": 70000
+    },
+    {
+      "epoch": 2.073529411764706,
+      "grad_norm": 0.0021414640359580517,
+      "learning_rate": 1.5441176470588237e-05,
+      "loss": 0.0174,
+      "step": 70500
+    },
+    {
+      "epoch": 2.088235294117647,
+      "grad_norm": 0.001627126126550138,
+      "learning_rate": 1.5196078431372548e-05,
+      "loss": 0.0154,
+      "step": 71000
+    },
+    {
+      "epoch": 2.1029411764705883,
+      "grad_norm": 0.005821748171001673,
+      "learning_rate": 1.4950980392156863e-05,
+      "loss": 0.0255,
+      "step": 71500
+    },
+    {
+      "epoch": 2.1176470588235294,
+      "grad_norm": 0.0071876379661262035,
+      "learning_rate": 1.4705882352941177e-05,
+      "loss": 0.024,
+      "step": 72000
+    },
+    {
+      "epoch": 2.1323529411764706,
+      "grad_norm": 0.00294076488353312,
+      "learning_rate": 1.4460784313725493e-05,
+      "loss": 0.012,
+      "step": 72500
+    },
+    {
+      "epoch": 2.1470588235294117,
+      "grad_norm": 0.004260234069079161,
+      "learning_rate": 1.4215686274509804e-05,
+      "loss": 0.0202,
+      "step": 73000
+    },
+    {
+      "epoch": 2.161764705882353,
+      "grad_norm": 0.005722737871110439,
+      "learning_rate": 1.3970588235294118e-05,
+      "loss": 0.0283,
+      "step": 73500
+    },
+    {
+      "epoch": 2.176470588235294,
+      "grad_norm": 0.016162721440196037,
+      "learning_rate": 1.3725490196078432e-05,
+      "loss": 0.0115,
+      "step": 74000
+    },
+    {
+      "epoch": 2.1911764705882355,
+      "grad_norm": 0.005048350431025028,
+      "learning_rate": 1.3480392156862745e-05,
+      "loss": 0.023,
+      "step": 74500
+    },
+    {
+      "epoch": 2.2058823529411766,
+      "grad_norm": 0.004102902952581644,
+      "learning_rate": 1.323529411764706e-05,
+      "loss": 0.018,
+      "step": 75000
+    },
+    {
+      "epoch": 2.2205882352941178,
+      "grad_norm": 0.0021536860149353743,
+      "learning_rate": 1.2990196078431374e-05,
+      "loss": 0.0169,
+      "step": 75500
+    },
+    {
+      "epoch": 2.235294117647059,
+      "grad_norm": 0.001363063813187182,
+      "learning_rate": 1.2745098039215686e-05,
+      "loss": 0.0129,
+      "step": 76000
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.0025944672524929047,
+      "learning_rate": 1.25e-05,
+      "loss": 0.0095,
+      "step": 76500
+    },
+    {
+      "epoch": 2.264705882352941,
+      "grad_norm": 0.001098868204280734,
+      "learning_rate": 1.2254901960784313e-05,
+      "loss": 0.0183,
+      "step": 77000
+    },
+    {
+      "epoch": 2.2794117647058822,
+      "grad_norm": 0.00421817135065794,
+      "learning_rate": 1.200980392156863e-05,
+      "loss": 0.0214,
+      "step": 77500
+    },
+    {
+      "epoch": 2.2941176470588234,
+      "grad_norm": 0.0016445108922198415,
+      "learning_rate": 1.1764705882352942e-05,
+      "loss": 0.009,
+      "step": 78000
+    },
+    {
+      "epoch": 2.3088235294117645,
+      "grad_norm": 0.002265740418806672,
+      "learning_rate": 1.1519607843137254e-05,
+      "loss": 0.0161,
+      "step": 78500
+    },
+    {
+      "epoch": 2.323529411764706,
+      "grad_norm": 0.007099386304616928,
+      "learning_rate": 1.1274509803921569e-05,
+      "loss": 0.0143,
+      "step": 79000
+    },
+    {
+      "epoch": 2.338235294117647,
+      "grad_norm": 0.003914414439350367,
+      "learning_rate": 1.1029411764705883e-05,
+      "loss": 0.0115,
+      "step": 79500
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.002513893647119403,
+      "learning_rate": 1.0784313725490197e-05,
+      "loss": 0.0136,
+      "step": 80000
+    },
+    {
+      "epoch": 2.3676470588235294,
+      "grad_norm": 0.0027356306090950966,
+      "learning_rate": 1.053921568627451e-05,
+      "loss": 0.0247,
+      "step": 80500
+    },
+    {
+      "epoch": 2.3823529411764706,
+      "grad_norm": 0.0817839726805687,
+      "learning_rate": 1.0294117647058824e-05,
+      "loss": 0.0169,
+      "step": 81000
+    },
+    {
+      "epoch": 2.3970588235294117,
+      "grad_norm": 0.08163878321647644,
+      "learning_rate": 1.0049019607843139e-05,
+      "loss": 0.0114,
+      "step": 81500
+    },
+    {
+      "epoch": 2.411764705882353,
+      "grad_norm": 0.002625884721055627,
+      "learning_rate": 9.803921568627451e-06,
+      "loss": 0.0147,
+      "step": 82000
+    },
+    {
+      "epoch": 2.426470588235294,
+      "grad_norm": 0.004531237296760082,
+      "learning_rate": 9.558823529411764e-06,
+      "loss": 0.0106,
+      "step": 82500
+    },
+    {
+      "epoch": 2.4411764705882355,
+      "grad_norm": 0.0032483581453561783,
+      "learning_rate": 9.31372549019608e-06,
+      "loss": 0.0183,
+      "step": 83000
+    },
+    {
+      "epoch": 2.4558823529411766,
+      "grad_norm": 0.0050782193429768085,
+      "learning_rate": 9.068627450980392e-06,
+      "loss": 0.0168,
+      "step": 83500
+    },
+    {
+      "epoch": 2.4705882352941178,
+      "grad_norm": 0.003839900717139244,
+      "learning_rate": 8.823529411764707e-06,
+      "loss": 0.0209,
+      "step": 84000
+    },
+    {
+      "epoch": 2.485294117647059,
+      "grad_norm": 0.008910595439374447,
+      "learning_rate": 8.57843137254902e-06,
+      "loss": 0.0271,
+      "step": 84500
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.008313077501952648,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.0175,
+      "step": 85000
+    },
+    {
+      "epoch": 2.514705882352941,
+      "grad_norm": 0.0029984668362885714,
+      "learning_rate": 8.088235294117648e-06,
+      "loss": 0.0084,
+      "step": 85500
+    },
+    {
+      "epoch": 2.5294117647058822,
+      "grad_norm": 0.004524989053606987,
+      "learning_rate": 7.84313725490196e-06,
+      "loss": 0.0156,
+      "step": 86000
+    },
+    {
+      "epoch": 2.5441176470588234,
+      "grad_norm": 0.0023315059952437878,
+      "learning_rate": 7.598039215686274e-06,
+      "loss": 0.0121,
+      "step": 86500
+    },
+    {
+      "epoch": 2.5588235294117645,
+      "grad_norm": 0.0010077670449391007,
+      "learning_rate": 7.3529411764705884e-06,
+      "loss": 0.0113,
+      "step": 87000
+    },
+    {
+      "epoch": 2.5735294117647056,
+      "grad_norm": 0.0012684785760939121,
+      "learning_rate": 7.107843137254902e-06,
+      "loss": 0.0204,
+      "step": 87500
+    },
+    {
+      "epoch": 2.588235294117647,
+      "grad_norm": 0.004918810911476612,
+      "learning_rate": 6.862745098039216e-06,
+      "loss": 0.0059,
+      "step": 88000
+    },
+    {
+      "epoch": 2.6029411764705883,
+      "grad_norm": 0.011489451862871647,
+      "learning_rate": 6.61764705882353e-06,
+      "loss": 0.0273,
+      "step": 88500
+    },
+    {
+      "epoch": 2.6176470588235294,
+      "grad_norm": 0.0028730102349072695,
+      "learning_rate": 6.372549019607843e-06,
+      "loss": 0.0277,
+      "step": 89000
+    },
+    {
+      "epoch": 2.6323529411764706,
+      "grad_norm": 0.005050596781075001,
+      "learning_rate": 6.127450980392157e-06,
+      "loss": 0.0148,
+      "step": 89500
+    },
+    {
+      "epoch": 2.6470588235294117,
+      "grad_norm": 0.0028206182178109884,
+      "learning_rate": 5.882352941176471e-06,
+      "loss": 0.0121,
+      "step": 90000
+    },
+    {
+      "epoch": 2.661764705882353,
+      "grad_norm": 0.0025763895828276873,
+      "learning_rate": 5.637254901960784e-06,
+      "loss": 0.0119,
+      "step": 90500
+    },
+    {
+      "epoch": 2.6764705882352944,
+      "grad_norm": 0.024479951709508896,
+      "learning_rate": 5.392156862745099e-06,
+      "loss": 0.0141,
+      "step": 91000
+    },
+    {
+      "epoch": 2.6911764705882355,
+      "grad_norm": 0.001322206575423479,
+      "learning_rate": 5.147058823529412e-06,
+      "loss": 0.0104,
+      "step": 91500
+    },
+    {
+      "epoch": 2.7058823529411766,
+      "grad_norm": 0.0011514847865328193,
+      "learning_rate": 4.901960784313726e-06,
+      "loss": 0.0192,
+      "step": 92000
+    },
+    {
+      "epoch": 2.7205882352941178,
+      "grad_norm": 0.002985934726893902,
+      "learning_rate": 4.65686274509804e-06,
+      "loss": 0.018,
+      "step": 92500
+    },
+    {
+      "epoch": 2.735294117647059,
+      "grad_norm": 0.0024135473649948835,
+      "learning_rate": 4.411764705882353e-06,
+      "loss": 0.009,
+      "step": 93000
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.001011635409668088,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.014,
+      "step": 93500
+    },
+    {
+      "epoch": 2.764705882352941,
+      "grad_norm": 1.9080002307891846,
+      "learning_rate": 3.92156862745098e-06,
+      "loss": 0.0191,
+      "step": 94000
+    },
+    {
+      "epoch": 2.7794117647058822,
+      "grad_norm": 0.004457990638911724,
+      "learning_rate": 3.6764705882352942e-06,
+      "loss": 0.0058,
+      "step": 94500
+    },
+    {
+      "epoch": 2.7941176470588234,
+      "grad_norm": 0.0034340699203312397,
+      "learning_rate": 3.431372549019608e-06,
+      "loss": 0.0124,
+      "step": 95000
+    },
+    {
+      "epoch": 2.8088235294117645,
+      "grad_norm": 0.0019747125916182995,
+      "learning_rate": 3.1862745098039216e-06,
+      "loss": 0.0244,
+      "step": 95500
+    },
+    {
+      "epoch": 2.8235294117647056,
+      "grad_norm": 0.0038515429478138685,
+      "learning_rate": 2.9411764705882355e-06,
+      "loss": 0.0163,
+      "step": 96000
+    },
+    {
+      "epoch": 2.838235294117647,
+      "grad_norm": 0.0029344563372433186,
+      "learning_rate": 2.6960784313725493e-06,
+      "loss": 0.0098,
+      "step": 96500
+    },
+    {
+      "epoch": 2.8529411764705883,
+      "grad_norm": 0.005202002823352814,
+      "learning_rate": 2.450980392156863e-06,
+      "loss": 0.0098,
+      "step": 97000
+    },
+    {
+      "epoch": 2.8676470588235294,
+      "grad_norm": 0.0045198979787528515,
+      "learning_rate": 2.2058823529411767e-06,
+      "loss": 0.0129,
+      "step": 97500
+    },
+    {
+      "epoch": 2.8823529411764706,
+      "grad_norm": 0.002258594846352935,
+      "learning_rate": 1.96078431372549e-06,
+      "loss": 0.0063,
+      "step": 98000
+    },
+    {
+      "epoch": 2.8970588235294117,
+      "grad_norm": 0.036095574498176575,
+      "learning_rate": 1.715686274509804e-06,
+      "loss": 0.0153,
+      "step": 98500
+    },
+    {
+      "epoch": 2.911764705882353,
+      "grad_norm": 0.003401304828003049,
+      "learning_rate": 1.4705882352941177e-06,
+      "loss": 0.0098,
+      "step": 99000
+    },
+    {
+      "epoch": 2.9264705882352944,
+      "grad_norm": 0.004960035905241966,
+      "learning_rate": 1.2254901960784314e-06,
+      "loss": 0.0072,
+      "step": 99500
+    },
+    {
+      "epoch": 2.9411764705882355,
+      "grad_norm": 0.004606081638485193,
+      "learning_rate": 9.80392156862745e-07,
+      "loss": 0.0107,
+      "step": 100000
+    },
+    {
+      "epoch": 2.9558823529411766,
+      "grad_norm": 0.0029280243907123804,
+      "learning_rate": 7.352941176470589e-07,
+      "loss": 0.0078,
+      "step": 100500
+    },
+    {
+      "epoch": 2.9705882352941178,
+      "grad_norm": 0.016250332817435265,
+      "learning_rate": 4.901960784313725e-07,
+      "loss": 0.0183,
+      "step": 101000
+    },
+    {
+      "epoch": 2.985294117647059,
+      "grad_norm": 0.002401071134954691,
+      "learning_rate": 2.4509803921568627e-07,
+      "loss": 0.0117,
+      "step": 101500
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.0015339870005846024,
+      "learning_rate": 0.0,
+      "loss": 0.0134,
+      "step": 102000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 102000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 6.323969768236646e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-102000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd4d39b2c10c5bbc511dbb6611ec40b6fde59c4b12ef3b86a49c36eefffee464
+size 4984

config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224-in21k",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "finetuning_task": "image-classification",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "bb",
+    "1": "bk",
+    "10": "wp",
+    "11": "wq",
+    "12": "wr",
+    "2": "bn",
+    "3": "bp",
+    "4": "bq",
+    "5": "br",
+    "6": "empty",
+    "7": "wb",
+    "8": "wk",
+    "9": "wn"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "bb": "0",
+    "bk": "1",
+    "bn": "2",
+    "bp": "3",
+    "bq": "4",
+    "br": "5",
+    "empty": "6",
+    "wb": "7",
+    "wk": "8",
+    "wn": "9",
+    "wp": "10",
+    "wq": "11",
+    "wr": "12"
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fed5d30904017d84562df1d3f165e9382aa1a8c48564aa75d45eed1462f356e
+size 343257812

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 6.323969768236646e+19,
+    "train_loss": 0.0325347986571929,
+    "train_runtime": 10422.0944,
+    "train_samples_per_second": 78.295,
+    "train_steps_per_second": 9.787
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1458 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 102000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014705882352941176,
+      "grad_norm": 0.35731062293052673,
+      "learning_rate": 4.975490196078432e-05,
+      "loss": 0.5354,
+      "step": 500
+    },
+    {
+      "epoch": 0.029411764705882353,
+      "grad_norm": 2.1595866680145264,
+      "learning_rate": 4.9509803921568634e-05,
+      "loss": 0.1627,
+      "step": 1000
+    },
+    {
+      "epoch": 0.04411764705882353,
+      "grad_norm": 0.14825384318828583,
+      "learning_rate": 4.9264705882352944e-05,
+      "loss": 0.101,
+      "step": 1500
+    },
+    {
+      "epoch": 0.058823529411764705,
+      "grad_norm": 0.07121703773736954,
+      "learning_rate": 4.901960784313725e-05,
+      "loss": 0.0952,
+      "step": 2000
+    },
+    {
+      "epoch": 0.07352941176470588,
+      "grad_norm": 0.06089532747864723,
+      "learning_rate": 4.877450980392157e-05,
+      "loss": 0.0881,
+      "step": 2500
+    },
+    {
+      "epoch": 0.08823529411764706,
+      "grad_norm": 0.037034619599580765,
+      "learning_rate": 4.8529411764705885e-05,
+      "loss": 0.0717,
+      "step": 3000
+    },
+    {
+      "epoch": 0.10294117647058823,
+      "grad_norm": 0.01999847963452339,
+      "learning_rate": 4.82843137254902e-05,
+      "loss": 0.0848,
+      "step": 3500
+    },
+    {
+      "epoch": 0.11764705882352941,
+      "grad_norm": 0.018650399520993233,
+      "learning_rate": 4.803921568627452e-05,
+      "loss": 0.068,
+      "step": 4000
+    },
+    {
+      "epoch": 0.1323529411764706,
+      "grad_norm": 0.023023229092359543,
+      "learning_rate": 4.7794117647058826e-05,
+      "loss": 0.0626,
+      "step": 4500
+    },
+    {
+      "epoch": 0.14705882352941177,
+      "grad_norm": 0.018715515732765198,
+      "learning_rate": 4.7549019607843135e-05,
+      "loss": 0.0555,
+      "step": 5000
+    },
+    {
+      "epoch": 0.16176470588235295,
+      "grad_norm": 0.11842140555381775,
+      "learning_rate": 4.730392156862745e-05,
+      "loss": 0.0658,
+      "step": 5500
+    },
+    {
+      "epoch": 0.17647058823529413,
+      "grad_norm": 0.04816881939768791,
+      "learning_rate": 4.705882352941177e-05,
+      "loss": 0.0648,
+      "step": 6000
+    },
+    {
+      "epoch": 0.19117647058823528,
+      "grad_norm": 0.025240018963813782,
+      "learning_rate": 4.681372549019608e-05,
+      "loss": 0.0632,
+      "step": 6500
+    },
+    {
+      "epoch": 0.20588235294117646,
+      "grad_norm": 0.007390766404569149,
+      "learning_rate": 4.656862745098039e-05,
+      "loss": 0.0705,
+      "step": 7000
+    },
+    {
+      "epoch": 0.22058823529411764,
+      "grad_norm": 0.011664963327348232,
+      "learning_rate": 4.632352941176471e-05,
+      "loss": 0.0552,
+      "step": 7500
+    },
+    {
+      "epoch": 0.23529411764705882,
+      "grad_norm": 39.69175720214844,
+      "learning_rate": 4.607843137254902e-05,
+      "loss": 0.0686,
+      "step": 8000
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 0.007366931065917015,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.061,
+      "step": 8500
+    },
+    {
+      "epoch": 0.2647058823529412,
+      "grad_norm": 0.003396671498194337,
+      "learning_rate": 4.558823529411765e-05,
+      "loss": 0.0464,
+      "step": 9000
+    },
+    {
+      "epoch": 0.27941176470588236,
+      "grad_norm": 0.045366521924734116,
+      "learning_rate": 4.5343137254901966e-05,
+      "loss": 0.053,
+      "step": 9500
+    },
+    {
+      "epoch": 0.29411764705882354,
+      "grad_norm": 1.137495756149292,
+      "learning_rate": 4.5098039215686275e-05,
+      "loss": 0.0484,
+      "step": 10000
+    },
+    {
+      "epoch": 0.3088235294117647,
+      "grad_norm": 0.0062417215667665005,
+      "learning_rate": 4.485294117647059e-05,
+      "loss": 0.0564,
+      "step": 10500
+    },
+    {
+      "epoch": 0.3235294117647059,
+      "grad_norm": 0.003990447614341974,
+      "learning_rate": 4.460784313725491e-05,
+      "loss": 0.0364,
+      "step": 11000
+    },
+    {
+      "epoch": 0.3382352941176471,
+      "grad_norm": 0.11569799482822418,
+      "learning_rate": 4.4362745098039216e-05,
+      "loss": 0.0436,
+      "step": 11500
+    },
+    {
+      "epoch": 0.35294117647058826,
+      "grad_norm": 0.01159907691180706,
+      "learning_rate": 4.411764705882353e-05,
+      "loss": 0.0456,
+      "step": 12000
+    },
+    {
+      "epoch": 0.36764705882352944,
+      "grad_norm": 10.926911354064941,
+      "learning_rate": 4.387254901960784e-05,
+      "loss": 0.0464,
+      "step": 12500
+    },
+    {
+      "epoch": 0.38235294117647056,
+      "grad_norm": 0.03444543853402138,
+      "learning_rate": 4.362745098039216e-05,
+      "loss": 0.0512,
+      "step": 13000
+    },
+    {
+      "epoch": 0.39705882352941174,
+      "grad_norm": 0.002395658055320382,
+      "learning_rate": 4.3382352941176474e-05,
+      "loss": 0.0586,
+      "step": 13500
+    },
+    {
+      "epoch": 0.4117647058823529,
+      "grad_norm": 0.010224021971225739,
+      "learning_rate": 4.313725490196079e-05,
+      "loss": 0.0407,
+      "step": 14000
+    },
+    {
+      "epoch": 0.4264705882352941,
+      "grad_norm": 0.010642035864293575,
+      "learning_rate": 4.28921568627451e-05,
+      "loss": 0.0536,
+      "step": 14500
+    },
+    {
+      "epoch": 0.4411764705882353,
+      "grad_norm": 0.00970557238906622,
+      "learning_rate": 4.2647058823529415e-05,
+      "loss": 0.0526,
+      "step": 15000
+    },
+    {
+      "epoch": 0.45588235294117646,
+      "grad_norm": 0.0030253385193645954,
+      "learning_rate": 4.2401960784313724e-05,
+      "loss": 0.0432,
+      "step": 15500
+    },
+    {
+      "epoch": 0.47058823529411764,
+      "grad_norm": 0.5423064827919006,
+      "learning_rate": 4.215686274509804e-05,
+      "loss": 0.0381,
+      "step": 16000
+    },
+    {
+      "epoch": 0.4852941176470588,
+      "grad_norm": 7.454125881195068,
+      "learning_rate": 4.1911764705882356e-05,
+      "loss": 0.0397,
+      "step": 16500
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 3.87744402885437,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.0564,
+      "step": 17000
+    },
+    {
+      "epoch": 0.5147058823529411,
+      "grad_norm": 33.39067840576172,
+      "learning_rate": 4.142156862745099e-05,
+      "loss": 0.0455,
+      "step": 17500
+    },
+    {
+      "epoch": 0.5294117647058824,
+      "grad_norm": 0.008246080949902534,
+      "learning_rate": 4.11764705882353e-05,
+      "loss": 0.0515,
+      "step": 18000
+    },
+    {
+      "epoch": 0.5441176470588235,
+      "grad_norm": 0.018734918907284737,
+      "learning_rate": 4.0931372549019607e-05,
+      "loss": 0.0453,
+      "step": 18500
+    },
+    {
+      "epoch": 0.5588235294117647,
+      "grad_norm": 0.007373027969151735,
+      "learning_rate": 4.068627450980392e-05,
+      "loss": 0.0485,
+      "step": 19000
+    },
+    {
+      "epoch": 0.5735294117647058,
+      "grad_norm": 0.0023267469368875027,
+      "learning_rate": 4.044117647058824e-05,
+      "loss": 0.0372,
+      "step": 19500
+    },
+    {
+      "epoch": 0.5882352941176471,
+      "grad_norm": 0.1311252862215042,
+      "learning_rate": 4.0196078431372555e-05,
+      "loss": 0.0442,
+      "step": 20000
+    },
+    {
+      "epoch": 0.6029411764705882,
+      "grad_norm": 0.003710985416546464,
+      "learning_rate": 3.9950980392156864e-05,
+      "loss": 0.0585,
+      "step": 20500
+    },
+    {
+      "epoch": 0.6176470588235294,
+      "grad_norm": 0.004861747846007347,
+      "learning_rate": 3.970588235294117e-05,
+      "loss": 0.0374,
+      "step": 21000
+    },
+    {
+      "epoch": 0.6323529411764706,
+      "grad_norm": 0.004351571202278137,
+      "learning_rate": 3.946078431372549e-05,
+      "loss": 0.0265,
+      "step": 21500
+    },
+    {
+      "epoch": 0.6470588235294118,
+      "grad_norm": 0.008851751685142517,
+      "learning_rate": 3.9215686274509805e-05,
+      "loss": 0.0314,
+      "step": 22000
+    },
+    {
+      "epoch": 0.6617647058823529,
+      "grad_norm": 0.0046307104639709,
+      "learning_rate": 3.897058823529412e-05,
+      "loss": 0.0445,
+      "step": 22500
+    },
+    {
+      "epoch": 0.6764705882352942,
+      "grad_norm": 0.002735880669206381,
+      "learning_rate": 3.872549019607844e-05,
+      "loss": 0.0433,
+      "step": 23000
+    },
+    {
+      "epoch": 0.6911764705882353,
+      "grad_norm": 0.0218490082770586,
+      "learning_rate": 3.8480392156862746e-05,
+      "loss": 0.0444,
+      "step": 23500
+    },
+    {
+      "epoch": 0.7058823529411765,
+      "grad_norm": 2.371448278427124,
+      "learning_rate": 3.8235294117647055e-05,
+      "loss": 0.033,
+      "step": 24000
+    },
+    {
+      "epoch": 0.7205882352941176,
+      "grad_norm": 0.005015780217945576,
+      "learning_rate": 3.799019607843137e-05,
+      "loss": 0.0422,
+      "step": 24500
+    },
+    {
+      "epoch": 0.7352941176470589,
+      "grad_norm": 1.3102850914001465,
+      "learning_rate": 3.774509803921569e-05,
+      "loss": 0.035,
+      "step": 25000
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 0.004522955510765314,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0303,
+      "step": 25500
+    },
+    {
+      "epoch": 0.7647058823529411,
+      "grad_norm": 0.006077844649553299,
+      "learning_rate": 3.725490196078432e-05,
+      "loss": 0.0503,
+      "step": 26000
+    },
+    {
+      "epoch": 0.7794117647058824,
+      "grad_norm": 0.0026446939446032047,
+      "learning_rate": 3.700980392156863e-05,
+      "loss": 0.0264,
+      "step": 26500
+    },
+    {
+      "epoch": 0.7941176470588235,
+      "grad_norm": 0.004485867917537689,
+      "learning_rate": 3.6764705882352945e-05,
+      "loss": 0.0388,
+      "step": 27000
+    },
+    {
+      "epoch": 0.8088235294117647,
+      "grad_norm": 0.0073866695165634155,
+      "learning_rate": 3.6519607843137254e-05,
+      "loss": 0.0283,
+      "step": 27500
+    },
+    {
+      "epoch": 0.8235294117647058,
+      "grad_norm": 0.012984287925064564,
+      "learning_rate": 3.627450980392157e-05,
+      "loss": 0.0393,
+      "step": 28000
+    },
+    {
+      "epoch": 0.8382352941176471,
+      "grad_norm": 0.01751883700489998,
+      "learning_rate": 3.6029411764705886e-05,
+      "loss": 0.0388,
+      "step": 28500
+    },
+    {
+      "epoch": 0.8529411764705882,
+      "grad_norm": 0.0035843336954712868,
+      "learning_rate": 3.5784313725490195e-05,
+      "loss": 0.0384,
+      "step": 29000
+    },
+    {
+      "epoch": 0.8676470588235294,
+      "grad_norm": 0.0645672082901001,
+      "learning_rate": 3.553921568627451e-05,
+      "loss": 0.0378,
+      "step": 29500
+    },
+    {
+      "epoch": 0.8823529411764706,
+      "grad_norm": 0.008566264994442463,
+      "learning_rate": 3.529411764705883e-05,
+      "loss": 0.0315,
+      "step": 30000
+    },
+    {
+      "epoch": 0.8970588235294118,
+      "grad_norm": 0.010571740567684174,
+      "learning_rate": 3.5049019607843136e-05,
+      "loss": 0.0324,
+      "step": 30500
+    },
+    {
+      "epoch": 0.9117647058823529,
+      "grad_norm": 0.0022533361334353685,
+      "learning_rate": 3.480392156862745e-05,
+      "loss": 0.0292,
+      "step": 31000
+    },
+    {
+      "epoch": 0.9264705882352942,
+      "grad_norm": 0.006164130289107561,
+      "learning_rate": 3.455882352941177e-05,
+      "loss": 0.0407,
+      "step": 31500
+    },
+    {
+      "epoch": 0.9411764705882353,
+      "grad_norm": 0.007435985840857029,
+      "learning_rate": 3.431372549019608e-05,
+      "loss": 0.0449,
+      "step": 32000
+    },
+    {
+      "epoch": 0.9558823529411765,
+      "grad_norm": 0.003777585458010435,
+      "learning_rate": 3.4068627450980394e-05,
+      "loss": 0.0247,
+      "step": 32500
+    },
+    {
+      "epoch": 0.9705882352941176,
+      "grad_norm": 0.005975374951958656,
+      "learning_rate": 3.382352941176471e-05,
+      "loss": 0.0309,
+      "step": 33000
+    },
+    {
+      "epoch": 0.9852941176470589,
+      "grad_norm": 0.0016012012492865324,
+      "learning_rate": 3.357843137254902e-05,
+      "loss": 0.0294,
+      "step": 33500
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.020027656108140945,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.022,
+      "step": 34000
+    },
+    {
+      "epoch": 1.0147058823529411,
+      "grad_norm": 10.916418075561523,
+      "learning_rate": 3.308823529411765e-05,
+      "loss": 0.0264,
+      "step": 34500
+    },
+    {
+      "epoch": 1.0294117647058822,
+      "grad_norm": 0.013728056102991104,
+      "learning_rate": 3.284313725490196e-05,
+      "loss": 0.032,
+      "step": 35000
+    },
+    {
+      "epoch": 1.0441176470588236,
+      "grad_norm": 0.02546643279492855,
+      "learning_rate": 3.2598039215686276e-05,
+      "loss": 0.0404,
+      "step": 35500
+    },
+    {
+      "epoch": 1.0588235294117647,
+      "grad_norm": 0.004185052588582039,
+      "learning_rate": 3.235294117647059e-05,
+      "loss": 0.0306,
+      "step": 36000
+    },
+    {
+      "epoch": 1.0735294117647058,
+      "grad_norm": 0.016653403639793396,
+      "learning_rate": 3.210784313725491e-05,
+      "loss": 0.0217,
+      "step": 36500
+    },
+    {
+      "epoch": 1.088235294117647,
+      "grad_norm": 0.0028331661596894264,
+      "learning_rate": 3.186274509803922e-05,
+      "loss": 0.0265,
+      "step": 37000
+    },
+    {
+      "epoch": 1.1029411764705883,
+      "grad_norm": 0.013931985944509506,
+      "learning_rate": 3.161764705882353e-05,
+      "loss": 0.0399,
+      "step": 37500
+    },
+    {
+      "epoch": 1.1176470588235294,
+      "grad_norm": 0.001668413169682026,
+      "learning_rate": 3.137254901960784e-05,
+      "loss": 0.0202,
+      "step": 38000
+    },
+    {
+      "epoch": 1.1323529411764706,
+      "grad_norm": 0.00567347789183259,
+      "learning_rate": 3.112745098039216e-05,
+      "loss": 0.0336,
+      "step": 38500
+    },
+    {
+      "epoch": 1.1470588235294117,
+      "grad_norm": 0.003477458842098713,
+      "learning_rate": 3.0882352941176475e-05,
+      "loss": 0.0253,
+      "step": 39000
+    },
+    {
+      "epoch": 1.161764705882353,
+      "grad_norm": 0.21819466352462769,
+      "learning_rate": 3.063725490196079e-05,
+      "loss": 0.0293,
+      "step": 39500
+    },
+    {
+      "epoch": 1.1764705882352942,
+      "grad_norm": 0.004708552733063698,
+      "learning_rate": 3.0392156862745097e-05,
+      "loss": 0.0268,
+      "step": 40000
+    },
+    {
+      "epoch": 1.1911764705882353,
+      "grad_norm": 0.0018363581039011478,
+      "learning_rate": 3.0147058823529413e-05,
+      "loss": 0.0266,
+      "step": 40500
+    },
+    {
+      "epoch": 1.2058823529411764,
+      "grad_norm": 0.005581580102443695,
+      "learning_rate": 2.9901960784313725e-05,
+      "loss": 0.0361,
+      "step": 41000
+    },
+    {
+      "epoch": 1.2205882352941178,
+      "grad_norm": 0.0035322746261954308,
+      "learning_rate": 2.965686274509804e-05,
+      "loss": 0.0274,
+      "step": 41500
+    },
+    {
+      "epoch": 1.2352941176470589,
+      "grad_norm": 0.0038708180654793978,
+      "learning_rate": 2.9411764705882354e-05,
+      "loss": 0.0241,
+      "step": 42000
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.0019606975838541985,
+      "learning_rate": 2.916666666666667e-05,
+      "loss": 0.0219,
+      "step": 42500
+    },
+    {
+      "epoch": 1.2647058823529411,
+      "grad_norm": 0.00241417670622468,
+      "learning_rate": 2.8921568627450986e-05,
+      "loss": 0.0244,
+      "step": 43000
+    },
+    {
+      "epoch": 1.2794117647058822,
+      "grad_norm": 0.003593308152630925,
+      "learning_rate": 2.8676470588235295e-05,
+      "loss": 0.027,
+      "step": 43500
+    },
+    {
+      "epoch": 1.2941176470588236,
+      "grad_norm": 1.4003372192382812,
+      "learning_rate": 2.8431372549019608e-05,
+      "loss": 0.0392,
+      "step": 44000
+    },
+    {
+      "epoch": 1.3088235294117647,
+      "grad_norm": 0.41921645402908325,
+      "learning_rate": 2.8186274509803924e-05,
+      "loss": 0.0299,
+      "step": 44500
+    },
+    {
+      "epoch": 1.3235294117647058,
+      "grad_norm": 1.567896842956543,
+      "learning_rate": 2.7941176470588236e-05,
+      "loss": 0.0282,
+      "step": 45000
+    },
+    {
+      "epoch": 1.3382352941176472,
+      "grad_norm": 0.003143745008856058,
+      "learning_rate": 2.7696078431372552e-05,
+      "loss": 0.0197,
+      "step": 45500
+    },
+    {
+      "epoch": 1.3529411764705883,
+      "grad_norm": 0.0016428233357146382,
+      "learning_rate": 2.7450980392156865e-05,
+      "loss": 0.0344,
+      "step": 46000
+    },
+    {
+      "epoch": 1.3676470588235294,
+      "grad_norm": 0.005276167765259743,
+      "learning_rate": 2.7205882352941174e-05,
+      "loss": 0.0312,
+      "step": 46500
+    },
+    {
+      "epoch": 1.3823529411764706,
+      "grad_norm": 0.004461635369807482,
+      "learning_rate": 2.696078431372549e-05,
+      "loss": 0.025,
+      "step": 47000
+    },
+    {
+      "epoch": 1.3970588235294117,
+      "grad_norm": 0.003238542238250375,
+      "learning_rate": 2.6715686274509806e-05,
+      "loss": 0.0236,
+      "step": 47500
+    },
+    {
+      "epoch": 1.4117647058823528,
+      "grad_norm": 0.0017602238804101944,
+      "learning_rate": 2.647058823529412e-05,
+      "loss": 0.0234,
+      "step": 48000
+    },
+    {
+      "epoch": 1.4264705882352942,
+      "grad_norm": 0.002768098609521985,
+      "learning_rate": 2.6225490196078435e-05,
+      "loss": 0.026,
+      "step": 48500
+    },
+    {
+      "epoch": 1.4411764705882353,
+      "grad_norm": 0.002579926745966077,
+      "learning_rate": 2.5980392156862747e-05,
+      "loss": 0.0167,
+      "step": 49000
+    },
+    {
+      "epoch": 1.4558823529411764,
+      "grad_norm": 0.008888750337064266,
+      "learning_rate": 2.5735294117647057e-05,
+      "loss": 0.0288,
+      "step": 49500
+    },
+    {
+      "epoch": 1.4705882352941178,
+      "grad_norm": 0.0038515792693942785,
+      "learning_rate": 2.5490196078431373e-05,
+      "loss": 0.0252,
+      "step": 50000
+    },
+    {
+      "epoch": 1.4852941176470589,
+      "grad_norm": 4.428643226623535,
+      "learning_rate": 2.5245098039215685e-05,
+      "loss": 0.016,
+      "step": 50500
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 0.002629584399983287,
+      "learning_rate": 2.5e-05,
+      "loss": 0.0214,
+      "step": 51000
+    },
+    {
+      "epoch": 1.5147058823529411,
+      "grad_norm": 0.004097859375178814,
+      "learning_rate": 2.4754901960784317e-05,
+      "loss": 0.0183,
+      "step": 51500
+    },
+    {
+      "epoch": 1.5294117647058822,
+      "grad_norm": 0.00568019924685359,
+      "learning_rate": 2.4509803921568626e-05,
+      "loss": 0.0271,
+      "step": 52000
+    },
+    {
+      "epoch": 1.5441176470588234,
+      "grad_norm": 0.0020534582436084747,
+      "learning_rate": 2.4264705882352942e-05,
+      "loss": 0.0281,
+      "step": 52500
+    },
+    {
+      "epoch": 1.5588235294117647,
+      "grad_norm": 0.003300599753856659,
+      "learning_rate": 2.401960784313726e-05,
+      "loss": 0.0284,
+      "step": 53000
+    },
+    {
+      "epoch": 1.5735294117647058,
+      "grad_norm": 0.001472037984058261,
+      "learning_rate": 2.3774509803921568e-05,
+      "loss": 0.0205,
+      "step": 53500
+    },
+    {
+      "epoch": 1.5882352941176472,
+      "grad_norm": 0.002835978288203478,
+      "learning_rate": 2.3529411764705884e-05,
+      "loss": 0.0234,
+      "step": 54000
+    },
+    {
+      "epoch": 1.6029411764705883,
+      "grad_norm": 0.003979724366217852,
+      "learning_rate": 2.3284313725490196e-05,
+      "loss": 0.0233,
+      "step": 54500
+    },
+    {
+      "epoch": 1.6176470588235294,
+      "grad_norm": 0.0036057273391634226,
+      "learning_rate": 2.303921568627451e-05,
+      "loss": 0.0319,
+      "step": 55000
+    },
+    {
+      "epoch": 1.6323529411764706,
+      "grad_norm": 0.0024822901468724012,
+      "learning_rate": 2.2794117647058825e-05,
+      "loss": 0.0211,
+      "step": 55500
+    },
+    {
+      "epoch": 1.6470588235294117,
+      "grad_norm": 0.008930359967052937,
+      "learning_rate": 2.2549019607843138e-05,
+      "loss": 0.0309,
+      "step": 56000
+    },
+    {
+      "epoch": 1.6617647058823528,
+      "grad_norm": 1.3381928205490112,
+      "learning_rate": 2.2303921568627454e-05,
+      "loss": 0.019,
+      "step": 56500
+    },
+    {
+      "epoch": 1.6764705882352942,
+      "grad_norm": 0.004481327719986439,
+      "learning_rate": 2.2058823529411766e-05,
+      "loss": 0.0228,
+      "step": 57000
+    },
+    {
+      "epoch": 1.6911764705882353,
+      "grad_norm": 0.012597435154020786,
+      "learning_rate": 2.181372549019608e-05,
+      "loss": 0.0285,
+      "step": 57500
+    },
+    {
+      "epoch": 1.7058823529411766,
+      "grad_norm": 0.0034783107694238424,
+      "learning_rate": 2.1568627450980395e-05,
+      "loss": 0.0165,
+      "step": 58000
+    },
+    {
+      "epoch": 1.7205882352941178,
+      "grad_norm": 0.0032083301339298487,
+      "learning_rate": 2.1323529411764707e-05,
+      "loss": 0.0306,
+      "step": 58500
+    },
+    {
+      "epoch": 1.7352941176470589,
+      "grad_norm": 0.0039000194519758224,
+      "learning_rate": 2.107843137254902e-05,
+      "loss": 0.0199,
+      "step": 59000
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 0.0034919639583677053,
+      "learning_rate": 2.0833333333333336e-05,
+      "loss": 0.034,
+      "step": 59500
+    },
+    {
+      "epoch": 1.7647058823529411,
+      "grad_norm": 0.004928025882691145,
+      "learning_rate": 2.058823529411765e-05,
+      "loss": 0.0213,
+      "step": 60000
+    },
+    {
+      "epoch": 1.7794117647058822,
+      "grad_norm": 0.003211489412933588,
+      "learning_rate": 2.034313725490196e-05,
+      "loss": 0.0206,
+      "step": 60500
+    },
+    {
+      "epoch": 1.7941176470588234,
+      "grad_norm": 0.011239697225391865,
+      "learning_rate": 2.0098039215686277e-05,
+      "loss": 0.0201,
+      "step": 61000
+    },
+    {
+      "epoch": 1.8088235294117647,
+      "grad_norm": 0.0024609589017927647,
+      "learning_rate": 1.9852941176470586e-05,
+      "loss": 0.0286,
+      "step": 61500
+    },
+    {
+      "epoch": 1.8235294117647058,
+      "grad_norm": 0.0046806493774056435,
+      "learning_rate": 1.9607843137254903e-05,
+      "loss": 0.0216,
+      "step": 62000
+    },
+    {
+      "epoch": 1.8382352941176472,
+      "grad_norm": 2.30717396736145,
+      "learning_rate": 1.936274509803922e-05,
+      "loss": 0.0246,
+      "step": 62500
+    },
+    {
+      "epoch": 1.8529411764705883,
+      "grad_norm": 0.00428669573739171,
+      "learning_rate": 1.9117647058823528e-05,
+      "loss": 0.0166,
+      "step": 63000
+    },
+    {
+      "epoch": 1.8676470588235294,
+      "grad_norm": 0.011403021402657032,
+      "learning_rate": 1.8872549019607844e-05,
+      "loss": 0.0272,
+      "step": 63500
+    },
+    {
+      "epoch": 1.8823529411764706,
+      "grad_norm": 0.0065813250839710236,
+      "learning_rate": 1.862745098039216e-05,
+      "loss": 0.0217,
+      "step": 64000
+    },
+    {
+      "epoch": 1.8970588235294117,
+      "grad_norm": 0.0024323465768247843,
+      "learning_rate": 1.8382352941176472e-05,
+      "loss": 0.0176,
+      "step": 64500
+    },
+    {
+      "epoch": 1.9117647058823528,
+      "grad_norm": 0.001616469700820744,
+      "learning_rate": 1.8137254901960785e-05,
+      "loss": 0.0169,
+      "step": 65000
+    },
+    {
+      "epoch": 1.9264705882352942,
+      "grad_norm": 0.004322522785514593,
+      "learning_rate": 1.7892156862745098e-05,
+      "loss": 0.0216,
+      "step": 65500
+    },
+    {
+      "epoch": 1.9411764705882353,
+      "grad_norm": 0.0024695000611245632,
+      "learning_rate": 1.7647058823529414e-05,
+      "loss": 0.032,
+      "step": 66000
+    },
+    {
+      "epoch": 1.9558823529411766,
+      "grad_norm": 0.010675052180886269,
+      "learning_rate": 1.7401960784313726e-05,
+      "loss": 0.023,
+      "step": 66500
+    },
+    {
+      "epoch": 1.9705882352941178,
+      "grad_norm": 0.49467232823371887,
+      "learning_rate": 1.715686274509804e-05,
+      "loss": 0.0169,
+      "step": 67000
+    },
+    {
+      "epoch": 1.9852941176470589,
+      "grad_norm": 0.01075649168342352,
+      "learning_rate": 1.6911764705882355e-05,
+      "loss": 0.0177,
+      "step": 67500
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.026703685522079468,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 0.0259,
+      "step": 68000
+    },
+    {
+      "epoch": 2.014705882352941,
+      "grad_norm": 0.0023890878073871136,
+      "learning_rate": 1.642156862745098e-05,
+      "loss": 0.0128,
+      "step": 68500
+    },
+    {
+      "epoch": 2.0294117647058822,
+      "grad_norm": 0.0015898487763479352,
+      "learning_rate": 1.6176470588235296e-05,
+      "loss": 0.0112,
+      "step": 69000
+    },
+    {
+      "epoch": 2.0441176470588234,
+      "grad_norm": 0.0041648312471807,
+      "learning_rate": 1.593137254901961e-05,
+      "loss": 0.0155,
+      "step": 69500
+    },
+    {
+      "epoch": 2.0588235294117645,
+      "grad_norm": 0.10951696336269379,
+      "learning_rate": 1.568627450980392e-05,
+      "loss": 0.0225,
+      "step": 70000
+    },
+    {
+      "epoch": 2.073529411764706,
+      "grad_norm": 0.0021414640359580517,
+      "learning_rate": 1.5441176470588237e-05,
+      "loss": 0.0174,
+      "step": 70500
+    },
+    {
+      "epoch": 2.088235294117647,
+      "grad_norm": 0.001627126126550138,
+      "learning_rate": 1.5196078431372548e-05,
+      "loss": 0.0154,
+      "step": 71000
+    },
+    {
+      "epoch": 2.1029411764705883,
+      "grad_norm": 0.005821748171001673,
+      "learning_rate": 1.4950980392156863e-05,
+      "loss": 0.0255,
+      "step": 71500
+    },
+    {
+      "epoch": 2.1176470588235294,
+      "grad_norm": 0.0071876379661262035,
+      "learning_rate": 1.4705882352941177e-05,
+      "loss": 0.024,
+      "step": 72000
+    },
+    {
+      "epoch": 2.1323529411764706,
+      "grad_norm": 0.00294076488353312,
+      "learning_rate": 1.4460784313725493e-05,
+      "loss": 0.012,
+      "step": 72500
+    },
+    {
+      "epoch": 2.1470588235294117,
+      "grad_norm": 0.004260234069079161,
+      "learning_rate": 1.4215686274509804e-05,
+      "loss": 0.0202,
+      "step": 73000
+    },
+    {
+      "epoch": 2.161764705882353,
+      "grad_norm": 0.005722737871110439,
+      "learning_rate": 1.3970588235294118e-05,
+      "loss": 0.0283,
+      "step": 73500
+    },
+    {
+      "epoch": 2.176470588235294,
+      "grad_norm": 0.016162721440196037,
+      "learning_rate": 1.3725490196078432e-05,
+      "loss": 0.0115,
+      "step": 74000
+    },
+    {
+      "epoch": 2.1911764705882355,
+      "grad_norm": 0.005048350431025028,
+      "learning_rate": 1.3480392156862745e-05,
+      "loss": 0.023,
+      "step": 74500
+    },
+    {
+      "epoch": 2.2058823529411766,
+      "grad_norm": 0.004102902952581644,
+      "learning_rate": 1.323529411764706e-05,
+      "loss": 0.018,
+      "step": 75000
+    },
+    {
+      "epoch": 2.2205882352941178,
+      "grad_norm": 0.0021536860149353743,
+      "learning_rate": 1.2990196078431374e-05,
+      "loss": 0.0169,
+      "step": 75500
+    },
+    {
+      "epoch": 2.235294117647059,
+      "grad_norm": 0.001363063813187182,
+      "learning_rate": 1.2745098039215686e-05,
+      "loss": 0.0129,
+      "step": 76000
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.0025944672524929047,
+      "learning_rate": 1.25e-05,
+      "loss": 0.0095,
+      "step": 76500
+    },
+    {
+      "epoch": 2.264705882352941,
+      "grad_norm": 0.001098868204280734,
+      "learning_rate": 1.2254901960784313e-05,
+      "loss": 0.0183,
+      "step": 77000
+    },
+    {
+      "epoch": 2.2794117647058822,
+      "grad_norm": 0.00421817135065794,
+      "learning_rate": 1.200980392156863e-05,
+      "loss": 0.0214,
+      "step": 77500
+    },
+    {
+      "epoch": 2.2941176470588234,
+      "grad_norm": 0.0016445108922198415,
+      "learning_rate": 1.1764705882352942e-05,
+      "loss": 0.009,
+      "step": 78000
+    },
+    {
+      "epoch": 2.3088235294117645,
+      "grad_norm": 0.002265740418806672,
+      "learning_rate": 1.1519607843137254e-05,
+      "loss": 0.0161,
+      "step": 78500
+    },
+    {
+      "epoch": 2.323529411764706,
+      "grad_norm": 0.007099386304616928,
+      "learning_rate": 1.1274509803921569e-05,
+      "loss": 0.0143,
+      "step": 79000
+    },
+    {
+      "epoch": 2.338235294117647,
+      "grad_norm": 0.003914414439350367,
+      "learning_rate": 1.1029411764705883e-05,
+      "loss": 0.0115,
+      "step": 79500
+    },
+    {
+      "epoch": 2.3529411764705883,
+      "grad_norm": 0.002513893647119403,
+      "learning_rate": 1.0784313725490197e-05,
+      "loss": 0.0136,
+      "step": 80000
+    },
+    {
+      "epoch": 2.3676470588235294,
+      "grad_norm": 0.0027356306090950966,
+      "learning_rate": 1.053921568627451e-05,
+      "loss": 0.0247,
+      "step": 80500
+    },
+    {
+      "epoch": 2.3823529411764706,
+      "grad_norm": 0.0817839726805687,
+      "learning_rate": 1.0294117647058824e-05,
+      "loss": 0.0169,
+      "step": 81000
+    },
+    {
+      "epoch": 2.3970588235294117,
+      "grad_norm": 0.08163878321647644,
+      "learning_rate": 1.0049019607843139e-05,
+      "loss": 0.0114,
+      "step": 81500
+    },
+    {
+      "epoch": 2.411764705882353,
+      "grad_norm": 0.002625884721055627,
+      "learning_rate": 9.803921568627451e-06,
+      "loss": 0.0147,
+      "step": 82000
+    },
+    {
+      "epoch": 2.426470588235294,
+      "grad_norm": 0.004531237296760082,
+      "learning_rate": 9.558823529411764e-06,
+      "loss": 0.0106,
+      "step": 82500
+    },
+    {
+      "epoch": 2.4411764705882355,
+      "grad_norm": 0.0032483581453561783,
+      "learning_rate": 9.31372549019608e-06,
+      "loss": 0.0183,
+      "step": 83000
+    },
+    {
+      "epoch": 2.4558823529411766,
+      "grad_norm": 0.0050782193429768085,
+      "learning_rate": 9.068627450980392e-06,
+      "loss": 0.0168,
+      "step": 83500
+    },
+    {
+      "epoch": 2.4705882352941178,
+      "grad_norm": 0.003839900717139244,
+      "learning_rate": 8.823529411764707e-06,
+      "loss": 0.0209,
+      "step": 84000
+    },
+    {
+      "epoch": 2.485294117647059,
+      "grad_norm": 0.008910595439374447,
+      "learning_rate": 8.57843137254902e-06,
+      "loss": 0.0271,
+      "step": 84500
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.008313077501952648,
+      "learning_rate": 8.333333333333334e-06,
+      "loss": 0.0175,
+      "step": 85000
+    },
+    {
+      "epoch": 2.514705882352941,
+      "grad_norm": 0.0029984668362885714,
+      "learning_rate": 8.088235294117648e-06,
+      "loss": 0.0084,
+      "step": 85500
+    },
+    {
+      "epoch": 2.5294117647058822,
+      "grad_norm": 0.004524989053606987,
+      "learning_rate": 7.84313725490196e-06,
+      "loss": 0.0156,
+      "step": 86000
+    },
+    {
+      "epoch": 2.5441176470588234,
+      "grad_norm": 0.0023315059952437878,
+      "learning_rate": 7.598039215686274e-06,
+      "loss": 0.0121,
+      "step": 86500
+    },
+    {
+      "epoch": 2.5588235294117645,
+      "grad_norm": 0.0010077670449391007,
+      "learning_rate": 7.3529411764705884e-06,
+      "loss": 0.0113,
+      "step": 87000
+    },
+    {
+      "epoch": 2.5735294117647056,
+      "grad_norm": 0.0012684785760939121,
+      "learning_rate": 7.107843137254902e-06,
+      "loss": 0.0204,
+      "step": 87500
+    },
+    {
+      "epoch": 2.588235294117647,
+      "grad_norm": 0.004918810911476612,
+      "learning_rate": 6.862745098039216e-06,
+      "loss": 0.0059,
+      "step": 88000
+    },
+    {
+      "epoch": 2.6029411764705883,
+      "grad_norm": 0.011489451862871647,
+      "learning_rate": 6.61764705882353e-06,
+      "loss": 0.0273,
+      "step": 88500
+    },
+    {
+      "epoch": 2.6176470588235294,
+      "grad_norm": 0.0028730102349072695,
+      "learning_rate": 6.372549019607843e-06,
+      "loss": 0.0277,
+      "step": 89000
+    },
+    {
+      "epoch": 2.6323529411764706,
+      "grad_norm": 0.005050596781075001,
+      "learning_rate": 6.127450980392157e-06,
+      "loss": 0.0148,
+      "step": 89500
+    },
+    {
+      "epoch": 2.6470588235294117,
+      "grad_norm": 0.0028206182178109884,
+      "learning_rate": 5.882352941176471e-06,
+      "loss": 0.0121,
+      "step": 90000
+    },
+    {
+      "epoch": 2.661764705882353,
+      "grad_norm": 0.0025763895828276873,
+      "learning_rate": 5.637254901960784e-06,
+      "loss": 0.0119,
+      "step": 90500
+    },
+    {
+      "epoch": 2.6764705882352944,
+      "grad_norm": 0.024479951709508896,
+      "learning_rate": 5.392156862745099e-06,
+      "loss": 0.0141,
+      "step": 91000
+    },
+    {
+      "epoch": 2.6911764705882355,
+      "grad_norm": 0.001322206575423479,
+      "learning_rate": 5.147058823529412e-06,
+      "loss": 0.0104,
+      "step": 91500
+    },
+    {
+      "epoch": 2.7058823529411766,
+      "grad_norm": 0.0011514847865328193,
+      "learning_rate": 4.901960784313726e-06,
+      "loss": 0.0192,
+      "step": 92000
+    },
+    {
+      "epoch": 2.7205882352941178,
+      "grad_norm": 0.002985934726893902,
+      "learning_rate": 4.65686274509804e-06,
+      "loss": 0.018,
+      "step": 92500
+    },
+    {
+      "epoch": 2.735294117647059,
+      "grad_norm": 0.0024135473649948835,
+      "learning_rate": 4.411764705882353e-06,
+      "loss": 0.009,
+      "step": 93000
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 0.001011635409668088,
+      "learning_rate": 4.166666666666667e-06,
+      "loss": 0.014,
+      "step": 93500
+    },
+    {
+      "epoch": 2.764705882352941,
+      "grad_norm": 1.9080002307891846,
+      "learning_rate": 3.92156862745098e-06,
+      "loss": 0.0191,
+      "step": 94000
+    },
+    {
+      "epoch": 2.7794117647058822,
+      "grad_norm": 0.004457990638911724,
+      "learning_rate": 3.6764705882352942e-06,
+      "loss": 0.0058,
+      "step": 94500
+    },
+    {
+      "epoch": 2.7941176470588234,
+      "grad_norm": 0.0034340699203312397,
+      "learning_rate": 3.431372549019608e-06,
+      "loss": 0.0124,
+      "step": 95000
+    },
+    {
+      "epoch": 2.8088235294117645,
+      "grad_norm": 0.0019747125916182995,
+      "learning_rate": 3.1862745098039216e-06,
+      "loss": 0.0244,
+      "step": 95500
+    },
+    {
+      "epoch": 2.8235294117647056,
+      "grad_norm": 0.0038515429478138685,
+      "learning_rate": 2.9411764705882355e-06,
+      "loss": 0.0163,
+      "step": 96000
+    },
+    {
+      "epoch": 2.838235294117647,
+      "grad_norm": 0.0029344563372433186,
+      "learning_rate": 2.6960784313725493e-06,
+      "loss": 0.0098,
+      "step": 96500
+    },
+    {
+      "epoch": 2.8529411764705883,
+      "grad_norm": 0.005202002823352814,
+      "learning_rate": 2.450980392156863e-06,
+      "loss": 0.0098,
+      "step": 97000
+    },
+    {
+      "epoch": 2.8676470588235294,
+      "grad_norm": 0.0045198979787528515,
+      "learning_rate": 2.2058823529411767e-06,
+      "loss": 0.0129,
+      "step": 97500
+    },
+    {
+      "epoch": 2.8823529411764706,
+      "grad_norm": 0.002258594846352935,
+      "learning_rate": 1.96078431372549e-06,
+      "loss": 0.0063,
+      "step": 98000
+    },
+    {
+      "epoch": 2.8970588235294117,
+      "grad_norm": 0.036095574498176575,
+      "learning_rate": 1.715686274509804e-06,
+      "loss": 0.0153,
+      "step": 98500
+    },
+    {
+      "epoch": 2.911764705882353,
+      "grad_norm": 0.003401304828003049,
+      "learning_rate": 1.4705882352941177e-06,
+      "loss": 0.0098,
+      "step": 99000
+    },
+    {
+      "epoch": 2.9264705882352944,
+      "grad_norm": 0.004960035905241966,
+      "learning_rate": 1.2254901960784314e-06,
+      "loss": 0.0072,
+      "step": 99500
+    },
+    {
+      "epoch": 2.9411764705882355,
+      "grad_norm": 0.004606081638485193,
+      "learning_rate": 9.80392156862745e-07,
+      "loss": 0.0107,
+      "step": 100000
+    },
+    {
+      "epoch": 2.9558823529411766,
+      "grad_norm": 0.0029280243907123804,
+      "learning_rate": 7.352941176470589e-07,
+      "loss": 0.0078,
+      "step": 100500
+    },
+    {
+      "epoch": 2.9705882352941178,
+      "grad_norm": 0.016250332817435265,
+      "learning_rate": 4.901960784313725e-07,
+      "loss": 0.0183,
+      "step": 101000
+    },
+    {
+      "epoch": 2.985294117647059,
+      "grad_norm": 0.002401071134954691,
+      "learning_rate": 2.4509803921568627e-07,
+      "loss": 0.0117,
+      "step": 101500
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.0015339870005846024,
+      "learning_rate": 0.0,
+      "loss": 0.0134,
+      "step": 102000
+    },
+    {
+      "epoch": 3.0,
+      "step": 102000,
+      "total_flos": 6.323969768236646e+19,
+      "train_loss": 0.0325347986571929,
+      "train_runtime": 10422.0944,
+      "train_samples_per_second": 78.295,
+      "train_steps_per_second": 9.787
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 102000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 6.323969768236646e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd4d39b2c10c5bbc511dbb6611ec40b6fde59c4b12ef3b86a49c36eefffee464
+size 4984