Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

README.md +42 -0
checkpoint-5181/config.json +51 -0
checkpoint-5181/model.safetensors +3 -0
checkpoint-5181/optimizer.pt +3 -0
checkpoint-5181/rng_state.pth +3 -0
checkpoint-5181/scheduler.pt +3 -0
checkpoint-5181/trainer_state.json +1524 -0
checkpoint-5181/training_args.bin +3 -0
config.json +51 -0
model.safetensors +3 -0
preprocessor_config.json +36 -0
runs/Apr15_18-44-28_496e75b93dc4/events.out.tfevents.1713206669.496e75b93dc4.24181.0 +2 -2
runs/Apr15_18-44-28_496e75b93dc4/events.out.tfevents.1713286725.496e75b93dc4.24181.1 +3 -0
training_args.bin +3 -0
training_params.json +29 -0

README.md ADDED Viewed

	@@ -0,0 +1,42 @@

+---
+tags:
+- autotrain
+- image-classification
+widget:
+- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
+  example_title: Tiger
+- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
+  example_title: Teapot
+- src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
+  example_title: Palace
+datasets:
+- xblock-large-patch2-224/autotrain-data
+---
+# Model Trained Using AutoTrain
+- Problem type: Image Classification
+## Validation Metrics
+loss: 0.4315283000469208
+f1_macro: 0.6149830093941424
+f1_micro: 0.8602430555555556
+f1_weighted: 0.8515059109185544
+precision_macro: 0.7610988679415244
+precision_micro: 0.8602430555555556
+precision_weighted: 0.8532444856848228
+recall_macro: 0.5527145295483504
+recall_micro: 0.8602430555555556
+recall_weighted: 0.8602430555555556
+accuracy: 0.8602430555555556

checkpoint-5181/config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "google/vit-large-patch16-224",
+  "_num_labels": 11,
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "altright",
+    "1": "bluesky",
+    "2": "facebook",
+    "3": "fediverse",
+    "4": "instagram",
+    "5": "irrelevant",
+    "6": "ngl",
+    "7": "reddit",
+    "8": "threads",
+    "9": "tumblr",
+    "10": "twitter"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "altright": 0,
+    "bluesky": 1,
+    "facebook": 2,
+    "fediverse": 3,
+    "instagram": 4,
+    "irrelevant": 5,
+    "ngl": 6,
+    "reddit": 7,
+    "threads": 8,
+    "tumblr": 9,
+    "twitter": 10
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3"
+}

checkpoint-5181/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb13c4b35ce597cb88a875a80db2847e13688ae876bf69bd6a658d707c9e1ca5
+size 1213298172

checkpoint-5181/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf6eda18a5094cea7273c05e2ccbac4d4efa7262fed33056a67c947675cbbdc3
+size 2426831274

checkpoint-5181/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:407c90e72710d8833691a0b520011b7262e79723b1f909d7c1ef5c77a957f5ae
+size 14244

checkpoint-5181/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:324fb18a51d85774ee66fbd2bc26b3ad6a7ee26f4e17cc706b54f6f9f7e49409
+size 1064

checkpoint-5181/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1524 @@

+{
+  "best_metric": 0.4315283000469208,
+  "best_model_checkpoint": "xblock-large-patch2-224/checkpoint-5181",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 5181,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01,
+      "grad_norm": 19.092004776000977,
+      "learning_rate": 2.2157996146435453e-06,
+      "loss": 2.5421,
+      "step": 25
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 11.274575233459473,
+      "learning_rate": 4.624277456647399e-06,
+      "loss": 1.9399,
+      "step": 50
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 10.260701179504395,
+      "learning_rate": 7.032755298651253e-06,
+      "loss": 1.5011,
+      "step": 75
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 9.719381332397461,
+      "learning_rate": 9.441233140655107e-06,
+      "loss": 1.3912,
+      "step": 100
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 10.234914779663086,
+      "learning_rate": 1.184971098265896e-05,
+      "loss": 1.2166,
+      "step": 125
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 13.722230911254883,
+      "learning_rate": 1.4258188824662813e-05,
+      "loss": 1.1751,
+      "step": 150
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 17.890995025634766,
+      "learning_rate": 1.6666666666666667e-05,
+      "loss": 1.1862,
+      "step": 175
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 13.72986125946045,
+      "learning_rate": 1.907514450867052e-05,
+      "loss": 0.8828,
+      "step": 200
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 12.903642654418945,
+      "learning_rate": 2.1483622350674377e-05,
+      "loss": 0.9541,
+      "step": 225
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 8.937955856323242,
+      "learning_rate": 2.3892100192678228e-05,
+      "loss": 0.9032,
+      "step": 250
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 10.743949890136719,
+      "learning_rate": 2.6300578034682083e-05,
+      "loss": 1.0891,
+      "step": 275
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 9.766569137573242,
+      "learning_rate": 2.8709055876685937e-05,
+      "loss": 0.8951,
+      "step": 300
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 10.973127365112305,
+      "learning_rate": 3.111753371868979e-05,
+      "loss": 1.0709,
+      "step": 325
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 13.805391311645508,
+      "learning_rate": 3.352601156069364e-05,
+      "loss": 1.1141,
+      "step": 350
+    },
+    {
+      "epoch": 0.22,
+      "grad_norm": 7.355842113494873,
+      "learning_rate": 3.59344894026975e-05,
+      "loss": 1.0031,
+      "step": 375
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 10.688377380371094,
+      "learning_rate": 3.834296724470135e-05,
+      "loss": 1.0344,
+      "step": 400
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 8.896137237548828,
+      "learning_rate": 4.07514450867052e-05,
+      "loss": 1.0991,
+      "step": 425
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 11.027874946594238,
+      "learning_rate": 4.3159922928709055e-05,
+      "loss": 1.0221,
+      "step": 450
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 27.792613983154297,
+      "learning_rate": 4.556840077071291e-05,
+      "loss": 0.8256,
+      "step": 475
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 8.005478858947754,
+      "learning_rate": 4.7976878612716764e-05,
+      "loss": 0.9862,
+      "step": 500
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 11.962843894958496,
+      "learning_rate": 4.995709995709996e-05,
+      "loss": 1.0462,
+      "step": 525
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 7.288177013397217,
+      "learning_rate": 4.9688974688974696e-05,
+      "loss": 0.9999,
+      "step": 550
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 3.769800901412964,
+      "learning_rate": 4.9420849420849425e-05,
+      "loss": 1.1262,
+      "step": 575
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 8.510008811950684,
+      "learning_rate": 4.9152724152724154e-05,
+      "loss": 0.944,
+      "step": 600
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 8.264263153076172,
+      "learning_rate": 4.888459888459889e-05,
+      "loss": 0.9652,
+      "step": 625
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 10.604584693908691,
+      "learning_rate": 4.861647361647362e-05,
+      "loss": 0.9246,
+      "step": 650
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 5.483927249908447,
+      "learning_rate": 4.834834834834835e-05,
+      "loss": 1.0557,
+      "step": 675
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 10.221104621887207,
+      "learning_rate": 4.808022308022308e-05,
+      "loss": 0.7906,
+      "step": 700
+    },
+    {
+      "epoch": 0.42,
+      "grad_norm": 5.725340843200684,
+      "learning_rate": 4.781209781209782e-05,
+      "loss": 0.9,
+      "step": 725
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 7.057939052581787,
+      "learning_rate": 4.754397254397255e-05,
+      "loss": 0.8744,
+      "step": 750
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 9.354517936706543,
+      "learning_rate": 4.727584727584728e-05,
+      "loss": 1.0611,
+      "step": 775
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 4.2830491065979,
+      "learning_rate": 4.700772200772201e-05,
+      "loss": 0.8894,
+      "step": 800
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 10.702705383300781,
+      "learning_rate": 4.673959673959674e-05,
+      "loss": 1.0246,
+      "step": 825
+    },
+    {
+      "epoch": 0.49,
+      "grad_norm": 4.7863359451293945,
+      "learning_rate": 4.647147147147147e-05,
+      "loss": 0.7967,
+      "step": 850
+    },
+    {
+      "epoch": 0.51,
+      "grad_norm": 7.841278553009033,
+      "learning_rate": 4.62033462033462e-05,
+      "loss": 0.8616,
+      "step": 875
+    },
+    {
+      "epoch": 0.52,
+      "grad_norm": 6.256266117095947,
+      "learning_rate": 4.593522093522094e-05,
+      "loss": 0.9735,
+      "step": 900
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 6.177362442016602,
+      "learning_rate": 4.566709566709567e-05,
+      "loss": 0.8287,
+      "step": 925
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 6.448288917541504,
+      "learning_rate": 4.53989703989704e-05,
+      "loss": 1.0062,
+      "step": 950
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 7.912018299102783,
+      "learning_rate": 4.513084513084513e-05,
+      "loss": 1.0174,
+      "step": 975
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 9.055561065673828,
+      "learning_rate": 4.486271986271987e-05,
+      "loss": 0.9616,
+      "step": 1000
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 7.433628559112549,
+      "learning_rate": 4.4594594594594596e-05,
+      "loss": 0.9309,
+      "step": 1025
+    },
+    {
+      "epoch": 0.61,
+      "grad_norm": 3.5334954261779785,
+      "learning_rate": 4.4326469326469325e-05,
+      "loss": 0.807,
+      "step": 1050
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 5.620259761810303,
+      "learning_rate": 4.405834405834406e-05,
+      "loss": 0.8042,
+      "step": 1075
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 7.67726469039917,
+      "learning_rate": 4.379021879021879e-05,
+      "loss": 0.7394,
+      "step": 1100
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 9.430630683898926,
+      "learning_rate": 4.3522093522093526e-05,
+      "loss": 0.7895,
+      "step": 1125
+    },
+    {
+      "epoch": 0.67,
+      "grad_norm": 9.241034507751465,
+      "learning_rate": 4.3253968253968256e-05,
+      "loss": 0.8032,
+      "step": 1150
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 7.471988201141357,
+      "learning_rate": 4.298584298584299e-05,
+      "loss": 0.7669,
+      "step": 1175
+    },
+    {
+      "epoch": 0.69,
+      "grad_norm": 3.161353588104248,
+      "learning_rate": 4.271771771771772e-05,
+      "loss": 0.8795,
+      "step": 1200
+    },
+    {
+      "epoch": 0.71,
+      "grad_norm": 6.6813201904296875,
+      "learning_rate": 4.244959244959245e-05,
+      "loss": 0.819,
+      "step": 1225
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 16.08786392211914,
+      "learning_rate": 4.2181467181467186e-05,
+      "loss": 0.8779,
+      "step": 1250
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 3.753849506378174,
+      "learning_rate": 4.1913341913341915e-05,
+      "loss": 0.9255,
+      "step": 1275
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 5.4661431312561035,
+      "learning_rate": 4.1645216645216644e-05,
+      "loss": 0.8028,
+      "step": 1300
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 6.559650897979736,
+      "learning_rate": 4.137709137709138e-05,
+      "loss": 0.7556,
+      "step": 1325
+    },
+    {
+      "epoch": 0.78,
+      "grad_norm": 5.7179341316223145,
+      "learning_rate": 4.1108966108966116e-05,
+      "loss": 0.7076,
+      "step": 1350
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 12.687734603881836,
+      "learning_rate": 4.0840840840840845e-05,
+      "loss": 0.8583,
+      "step": 1375
+    },
+    {
+      "epoch": 0.81,
+      "grad_norm": 5.1677398681640625,
+      "learning_rate": 4.0572715572715574e-05,
+      "loss": 0.8944,
+      "step": 1400
+    },
+    {
+      "epoch": 0.83,
+      "grad_norm": 11.766656875610352,
+      "learning_rate": 4.03045903045903e-05,
+      "loss": 0.5638,
+      "step": 1425
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 4.12522554397583,
+      "learning_rate": 4.003646503646504e-05,
+      "loss": 0.6883,
+      "step": 1450
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 2.586186408996582,
+      "learning_rate": 3.976833976833977e-05,
+      "loss": 0.8784,
+      "step": 1475
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 4.075995445251465,
+      "learning_rate": 3.95002145002145e-05,
+      "loss": 0.7596,
+      "step": 1500
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 12.722098350524902,
+      "learning_rate": 3.923208923208923e-05,
+      "loss": 0.6316,
+      "step": 1525
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 12.86962890625,
+      "learning_rate": 3.896396396396397e-05,
+      "loss": 0.7721,
+      "step": 1550
+    },
+    {
+      "epoch": 0.91,
+      "grad_norm": 10.640520095825195,
+      "learning_rate": 3.86958386958387e-05,
+      "loss": 0.711,
+      "step": 1575
+    },
+    {
+      "epoch": 0.93,
+      "grad_norm": 7.080173015594482,
+      "learning_rate": 3.842771342771343e-05,
+      "loss": 0.8145,
+      "step": 1600
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 2.814232587814331,
+      "learning_rate": 3.815958815958816e-05,
+      "loss": 0.8509,
+      "step": 1625
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 12.209416389465332,
+      "learning_rate": 3.789146289146289e-05,
+      "loss": 0.8037,
+      "step": 1650
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 3.5646421909332275,
+      "learning_rate": 3.762333762333762e-05,
+      "loss": 0.8347,
+      "step": 1675
+    },
+    {
+      "epoch": 0.98,
+      "grad_norm": 8.00243854522705,
+      "learning_rate": 3.735521235521236e-05,
+      "loss": 0.7534,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 5.955112934112549,
+      "learning_rate": 3.708708708708709e-05,
+      "loss": 0.9062,
+      "step": 1725
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7815393518518519,
+      "eval_f1_macro": 0.36763187534206965,
+      "eval_f1_micro": 0.7815393518518519,
+      "eval_f1_weighted": 0.7649698649865229,
+      "eval_loss": 0.6848556399345398,
+      "eval_precision_macro": 0.4527657259795588,
+      "eval_precision_micro": 0.7815393518518519,
+      "eval_precision_weighted": 0.7691072277887989,
+      "eval_recall_macro": 0.3815255183458625,
+      "eval_recall_micro": 0.7815393518518519,
+      "eval_recall_weighted": 0.7815393518518519,
+      "eval_runtime": 3330.8434,
+      "eval_samples_per_second": 1.038,
+      "eval_steps_per_second": 0.065,
+      "step": 1727
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 3.4409847259521484,
+      "learning_rate": 3.681896181896182e-05,
+      "loss": 0.8201,
+      "step": 1750
+    },
+    {
+      "epoch": 1.03,
+      "grad_norm": 7.564268589019775,
+      "learning_rate": 3.655083655083655e-05,
+      "loss": 0.548,
+      "step": 1775
+    },
+    {
+      "epoch": 1.04,
+      "grad_norm": 10.243828773498535,
+      "learning_rate": 3.628271128271129e-05,
+      "loss": 0.6348,
+      "step": 1800
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 14.57152271270752,
+      "learning_rate": 3.6014586014586017e-05,
+      "loss": 0.66,
+      "step": 1825
+    },
+    {
+      "epoch": 1.07,
+      "grad_norm": 4.4091973304748535,
+      "learning_rate": 3.5746460746460746e-05,
+      "loss": 0.8169,
+      "step": 1850
+    },
+    {
+      "epoch": 1.09,
+      "grad_norm": 5.423861503601074,
+      "learning_rate": 3.547833547833548e-05,
+      "loss": 0.7353,
+      "step": 1875
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 6.510718822479248,
+      "learning_rate": 3.521021021021021e-05,
+      "loss": 0.7008,
+      "step": 1900
+    },
+    {
+      "epoch": 1.11,
+      "grad_norm": 2.5035436153411865,
+      "learning_rate": 3.4942084942084947e-05,
+      "loss": 0.5975,
+      "step": 1925
+    },
+    {
+      "epoch": 1.13,
+      "grad_norm": 15.364286422729492,
+      "learning_rate": 3.4673959673959676e-05,
+      "loss": 0.8198,
+      "step": 1950
+    },
+    {
+      "epoch": 1.14,
+      "grad_norm": 16.03240966796875,
+      "learning_rate": 3.440583440583441e-05,
+      "loss": 0.7089,
+      "step": 1975
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 8.521039962768555,
+      "learning_rate": 3.413770913770914e-05,
+      "loss": 0.6514,
+      "step": 2000
+    },
+    {
+      "epoch": 1.17,
+      "grad_norm": 5.206024169921875,
+      "learning_rate": 3.386958386958387e-05,
+      "loss": 0.7545,
+      "step": 2025
+    },
+    {
+      "epoch": 1.19,
+      "grad_norm": 7.756472110748291,
+      "learning_rate": 3.36014586014586e-05,
+      "loss": 0.8055,
+      "step": 2050
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 17.274944305419922,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.7212,
+      "step": 2075
+    },
+    {
+      "epoch": 1.22,
+      "grad_norm": 5.303420543670654,
+      "learning_rate": 3.3065208065208064e-05,
+      "loss": 0.7299,
+      "step": 2100
+    },
+    {
+      "epoch": 1.23,
+      "grad_norm": 5.07370138168335,
+      "learning_rate": 3.27970827970828e-05,
+      "loss": 0.5858,
+      "step": 2125
+    },
+    {
+      "epoch": 1.24,
+      "grad_norm": 6.2755937576293945,
+      "learning_rate": 3.252895752895753e-05,
+      "loss": 0.7687,
+      "step": 2150
+    },
+    {
+      "epoch": 1.26,
+      "grad_norm": 12.160276412963867,
+      "learning_rate": 3.227155727155727e-05,
+      "loss": 0.727,
+      "step": 2175
+    },
+    {
+      "epoch": 1.27,
+      "grad_norm": 3.9984447956085205,
+      "learning_rate": 3.2003432003432e-05,
+      "loss": 0.6697,
+      "step": 2200
+    },
+    {
+      "epoch": 1.29,
+      "grad_norm": 5.756568908691406,
+      "learning_rate": 3.173530673530674e-05,
+      "loss": 0.7912,
+      "step": 2225
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 8.783411026000977,
+      "learning_rate": 3.1467181467181466e-05,
+      "loss": 0.7035,
+      "step": 2250
+    },
+    {
+      "epoch": 1.32,
+      "grad_norm": 5.451704502105713,
+      "learning_rate": 3.1199056199056196e-05,
+      "loss": 0.7144,
+      "step": 2275
+    },
+    {
+      "epoch": 1.33,
+      "grad_norm": 5.039503574371338,
+      "learning_rate": 3.093093093093093e-05,
+      "loss": 0.7893,
+      "step": 2300
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 12.280179023742676,
+      "learning_rate": 3.066280566280567e-05,
+      "loss": 0.5903,
+      "step": 2325
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 6.999240398406982,
+      "learning_rate": 3.0394680394680397e-05,
+      "loss": 0.651,
+      "step": 2350
+    },
+    {
+      "epoch": 1.38,
+      "grad_norm": 7.355953216552734,
+      "learning_rate": 3.012655512655513e-05,
+      "loss": 0.878,
+      "step": 2375
+    },
+    {
+      "epoch": 1.39,
+      "grad_norm": 12.737029075622559,
+      "learning_rate": 2.9858429858429858e-05,
+      "loss": 0.731,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 4.184784889221191,
+      "learning_rate": 2.959030459030459e-05,
+      "loss": 0.8775,
+      "step": 2425
+    },
+    {
+      "epoch": 1.42,
+      "grad_norm": 9.188583374023438,
+      "learning_rate": 2.9322179322179323e-05,
+      "loss": 0.7259,
+      "step": 2450
+    },
+    {
+      "epoch": 1.43,
+      "grad_norm": 12.98018741607666,
+      "learning_rate": 2.906477906477907e-05,
+      "loss": 0.6219,
+      "step": 2475
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 12.084989547729492,
+      "learning_rate": 2.87966537966538e-05,
+      "loss": 0.5074,
+      "step": 2500
+    },
+    {
+      "epoch": 1.46,
+      "grad_norm": 5.283312797546387,
+      "learning_rate": 2.852852852852853e-05,
+      "loss": 0.6606,
+      "step": 2525
+    },
+    {
+      "epoch": 1.48,
+      "grad_norm": 19.32860565185547,
+      "learning_rate": 2.826040326040326e-05,
+      "loss": 0.7651,
+      "step": 2550
+    },
+    {
+      "epoch": 1.49,
+      "grad_norm": 4.0794901847839355,
+      "learning_rate": 2.7992277992277993e-05,
+      "loss": 0.6737,
+      "step": 2575
+    },
+    {
+      "epoch": 1.51,
+      "grad_norm": 33.1405029296875,
+      "learning_rate": 2.7724152724152726e-05,
+      "loss": 0.654,
+      "step": 2600
+    },
+    {
+      "epoch": 1.52,
+      "grad_norm": 4.921344757080078,
+      "learning_rate": 2.7456027456027455e-05,
+      "loss": 0.7194,
+      "step": 2625
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 5.453707695007324,
+      "learning_rate": 2.7187902187902187e-05,
+      "loss": 0.6851,
+      "step": 2650
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 8.766169548034668,
+      "learning_rate": 2.6919776919776923e-05,
+      "loss": 0.8228,
+      "step": 2675
+    },
+    {
+      "epoch": 1.56,
+      "grad_norm": 8.957389831542969,
+      "learning_rate": 2.6651651651651656e-05,
+      "loss": 0.6645,
+      "step": 2700
+    },
+    {
+      "epoch": 1.58,
+      "grad_norm": 5.715158939361572,
+      "learning_rate": 2.6383526383526385e-05,
+      "loss": 0.6905,
+      "step": 2725
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 6.306962490081787,
+      "learning_rate": 2.6115401115401117e-05,
+      "loss": 0.6924,
+      "step": 2750
+    },
+    {
+      "epoch": 1.61,
+      "grad_norm": 8.548517227172852,
+      "learning_rate": 2.5847275847275846e-05,
+      "loss": 0.8402,
+      "step": 2775
+    },
+    {
+      "epoch": 1.62,
+      "grad_norm": 7.5719895362854,
+      "learning_rate": 2.557915057915058e-05,
+      "loss": 0.7758,
+      "step": 2800
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 8.910326957702637,
+      "learning_rate": 2.531102531102531e-05,
+      "loss": 0.5645,
+      "step": 2825
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 8.304277420043945,
+      "learning_rate": 2.504290004290004e-05,
+      "loss": 0.7066,
+      "step": 2850
+    },
+    {
+      "epoch": 1.66,
+      "grad_norm": 13.99254035949707,
+      "learning_rate": 2.4774774774774777e-05,
+      "loss": 0.7396,
+      "step": 2875
+    },
+    {
+      "epoch": 1.68,
+      "grad_norm": 3.806931257247925,
+      "learning_rate": 2.4506649506649506e-05,
+      "loss": 0.6334,
+      "step": 2900
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 6.550988674163818,
+      "learning_rate": 2.423852423852424e-05,
+      "loss": 0.9251,
+      "step": 2925
+    },
+    {
+      "epoch": 1.71,
+      "grad_norm": 3.1442198753356934,
+      "learning_rate": 2.397039897039897e-05,
+      "loss": 0.546,
+      "step": 2950
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 5.249305248260498,
+      "learning_rate": 2.3702273702273703e-05,
+      "loss": 0.6419,
+      "step": 2975
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 5.300810813903809,
+      "learning_rate": 2.3434148434148436e-05,
+      "loss": 0.535,
+      "step": 3000
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 8.079426765441895,
+      "learning_rate": 2.3166023166023168e-05,
+      "loss": 0.8142,
+      "step": 3025
+    },
+    {
+      "epoch": 1.77,
+      "grad_norm": 6.737719535827637,
+      "learning_rate": 2.28978978978979e-05,
+      "loss": 0.5974,
+      "step": 3050
+    },
+    {
+      "epoch": 1.78,
+      "grad_norm": 5.037626266479492,
+      "learning_rate": 2.262977262977263e-05,
+      "loss": 0.7068,
+      "step": 3075
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 3.4523470401763916,
+      "learning_rate": 2.2361647361647362e-05,
+      "loss": 0.5756,
+      "step": 3100
+    },
+    {
+      "epoch": 1.81,
+      "grad_norm": 2.2966675758361816,
+      "learning_rate": 2.2093522093522095e-05,
+      "loss": 0.4941,
+      "step": 3125
+    },
+    {
+      "epoch": 1.82,
+      "grad_norm": 11.497820854187012,
+      "learning_rate": 2.1825396825396827e-05,
+      "loss": 0.8353,
+      "step": 3150
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 11.813599586486816,
+      "learning_rate": 2.1557271557271557e-05,
+      "loss": 0.8303,
+      "step": 3175
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 2.504293203353882,
+      "learning_rate": 2.128914628914629e-05,
+      "loss": 0.5574,
+      "step": 3200
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 8.983193397521973,
+      "learning_rate": 2.102102102102102e-05,
+      "loss": 0.6033,
+      "step": 3225
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 7.629824161529541,
+      "learning_rate": 2.0752895752895754e-05,
+      "loss": 0.6305,
+      "step": 3250
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 10.86919116973877,
+      "learning_rate": 2.0484770484770487e-05,
+      "loss": 0.6045,
+      "step": 3275
+    },
+    {
+      "epoch": 1.91,
+      "grad_norm": 6.0854387283325195,
+      "learning_rate": 2.0216645216645216e-05,
+      "loss": 0.6208,
+      "step": 3300
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 6.228011131286621,
+      "learning_rate": 1.994851994851995e-05,
+      "loss": 0.6249,
+      "step": 3325
+    },
+    {
+      "epoch": 1.94,
+      "grad_norm": 7.076812744140625,
+      "learning_rate": 1.968039468039468e-05,
+      "loss": 0.6176,
+      "step": 3350
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 7.893978595733643,
+      "learning_rate": 1.9412269412269413e-05,
+      "loss": 0.7779,
+      "step": 3375
+    },
+    {
+      "epoch": 1.97,
+      "grad_norm": 7.72683048248291,
+      "learning_rate": 1.9144144144144142e-05,
+      "loss": 0.669,
+      "step": 3400
+    },
+    {
+      "epoch": 1.98,
+      "grad_norm": 3.820025682449341,
+      "learning_rate": 1.887601887601888e-05,
+      "loss": 0.6182,
+      "step": 3425
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 5.642152309417725,
+      "learning_rate": 1.8607893607893607e-05,
+      "loss": 0.6453,
+      "step": 3450
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8454861111111112,
+      "eval_f1_macro": 0.5038698715266201,
+      "eval_f1_micro": 0.845486111111111,
+      "eval_f1_weighted": 0.830657390454042,
+      "eval_loss": 0.4813511371612549,
+      "eval_precision_macro": 0.7578927995388053,
+      "eval_precision_micro": 0.8454861111111112,
+      "eval_precision_weighted": 0.8347843050246918,
+      "eval_recall_macro": 0.456096431265928,
+      "eval_recall_micro": 0.8454861111111112,
+      "eval_recall_weighted": 0.8454861111111112,
+      "eval_runtime": 3281.605,
+      "eval_samples_per_second": 1.053,
+      "eval_steps_per_second": 0.066,
+      "step": 3454
+    },
+    {
+      "epoch": 2.01,
+      "grad_norm": 8.240702629089355,
+      "learning_rate": 1.833976833976834e-05,
+      "loss": 0.4854,
+      "step": 3475
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 7.087810039520264,
+      "learning_rate": 1.8071643071643072e-05,
+      "loss": 0.5459,
+      "step": 3500
+    },
+    {
+      "epoch": 2.04,
+      "grad_norm": 0.7334815859794617,
+      "learning_rate": 1.7803517803517805e-05,
+      "loss": 0.5212,
+      "step": 3525
+    },
+    {
+      "epoch": 2.06,
+      "grad_norm": 6.095980167388916,
+      "learning_rate": 1.7535392535392538e-05,
+      "loss": 0.5191,
+      "step": 3550
+    },
+    {
+      "epoch": 2.07,
+      "grad_norm": 10.246546745300293,
+      "learning_rate": 1.7267267267267267e-05,
+      "loss": 0.6645,
+      "step": 3575
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 3.6809566020965576,
+      "learning_rate": 1.6999141999142e-05,
+      "loss": 0.5629,
+      "step": 3600
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 13.609752655029297,
+      "learning_rate": 1.673101673101673e-05,
+      "loss": 0.6598,
+      "step": 3625
+    },
+    {
+      "epoch": 2.11,
+      "grad_norm": 5.554472923278809,
+      "learning_rate": 1.6462891462891464e-05,
+      "loss": 0.4935,
+      "step": 3650
+    },
+    {
+      "epoch": 2.13,
+      "grad_norm": 16.72881317138672,
+      "learning_rate": 1.6205491205491204e-05,
+      "loss": 0.379,
+      "step": 3675
+    },
+    {
+      "epoch": 2.14,
+      "grad_norm": 9.573266983032227,
+      "learning_rate": 1.593736593736594e-05,
+      "loss": 0.7141,
+      "step": 3700
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 5.245655059814453,
+      "learning_rate": 1.566924066924067e-05,
+      "loss": 0.7518,
+      "step": 3725
+    },
+    {
+      "epoch": 2.17,
+      "grad_norm": 13.11945915222168,
+      "learning_rate": 1.54011154011154e-05,
+      "loss": 0.6588,
+      "step": 3750
+    },
+    {
+      "epoch": 2.19,
+      "grad_norm": 6.219137191772461,
+      "learning_rate": 1.5132990132990132e-05,
+      "loss": 0.5884,
+      "step": 3775
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 6.516097545623779,
+      "learning_rate": 1.4864864864864867e-05,
+      "loss": 0.4572,
+      "step": 3800
+    },
+    {
+      "epoch": 2.21,
+      "grad_norm": 4.23282527923584,
+      "learning_rate": 1.4596739596739597e-05,
+      "loss": 0.6028,
+      "step": 3825
+    },
+    {
+      "epoch": 2.23,
+      "grad_norm": 2.6169815063476562,
+      "learning_rate": 1.4328614328614328e-05,
+      "loss": 0.6198,
+      "step": 3850
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 4.720090389251709,
+      "learning_rate": 1.4060489060489059e-05,
+      "loss": 0.556,
+      "step": 3875
+    },
+    {
+      "epoch": 2.26,
+      "grad_norm": 7.367048263549805,
+      "learning_rate": 1.3792363792363793e-05,
+      "loss": 0.5812,
+      "step": 3900
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 7.3934173583984375,
+      "learning_rate": 1.3524238524238526e-05,
+      "loss": 0.6141,
+      "step": 3925
+    },
+    {
+      "epoch": 2.29,
+      "grad_norm": 10.528743743896484,
+      "learning_rate": 1.3256113256113257e-05,
+      "loss": 0.6047,
+      "step": 3950
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 7.282771110534668,
+      "learning_rate": 1.2987987987987987e-05,
+      "loss": 0.6281,
+      "step": 3975
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 11.970826148986816,
+      "learning_rate": 1.2719862719862722e-05,
+      "loss": 0.4103,
+      "step": 4000
+    },
+    {
+      "epoch": 2.33,
+      "grad_norm": 6.620480537414551,
+      "learning_rate": 1.2451737451737452e-05,
+      "loss": 0.5386,
+      "step": 4025
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 5.699476718902588,
+      "learning_rate": 1.2183612183612183e-05,
+      "loss": 0.6507,
+      "step": 4050
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 2.477766275405884,
+      "learning_rate": 1.1915486915486916e-05,
+      "loss": 0.524,
+      "step": 4075
+    },
+    {
+      "epoch": 2.37,
+      "grad_norm": 6.517852306365967,
+      "learning_rate": 1.1647361647361647e-05,
+      "loss": 0.6979,
+      "step": 4100
+    },
+    {
+      "epoch": 2.39,
+      "grad_norm": 4.506691932678223,
+      "learning_rate": 1.1379236379236379e-05,
+      "loss": 0.4651,
+      "step": 4125
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 6.522432804107666,
+      "learning_rate": 1.1111111111111112e-05,
+      "loss": 0.6845,
+      "step": 4150
+    },
+    {
+      "epoch": 2.42,
+      "grad_norm": 12.015291213989258,
+      "learning_rate": 1.0842985842985844e-05,
+      "loss": 0.5348,
+      "step": 4175
+    },
+    {
+      "epoch": 2.43,
+      "grad_norm": 7.297937393188477,
+      "learning_rate": 1.0574860574860575e-05,
+      "loss": 0.5412,
+      "step": 4200
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 13.665657997131348,
+      "learning_rate": 1.0306735306735307e-05,
+      "loss": 0.5137,
+      "step": 4225
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 11.743260383605957,
+      "learning_rate": 1.0038610038610038e-05,
+      "loss": 0.5738,
+      "step": 4250
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 10.24691104888916,
+      "learning_rate": 9.77048477048477e-06,
+      "loss": 0.5134,
+      "step": 4275
+    },
+    {
+      "epoch": 2.49,
+      "grad_norm": 4.543239116668701,
+      "learning_rate": 9.502359502359502e-06,
+      "loss": 0.6055,
+      "step": 4300
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 7.915064811706543,
+      "learning_rate": 9.234234234234234e-06,
+      "loss": 0.5153,
+      "step": 4325
+    },
+    {
+      "epoch": 2.52,
+      "grad_norm": 8.37210750579834,
+      "learning_rate": 8.966108966108967e-06,
+      "loss": 0.4754,
+      "step": 4350
+    },
+    {
+      "epoch": 2.53,
+      "grad_norm": 7.3417487144470215,
+      "learning_rate": 8.6979836979837e-06,
+      "loss": 0.6285,
+      "step": 4375
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 9.027023315429688,
+      "learning_rate": 8.42985842985843e-06,
+      "loss": 0.4925,
+      "step": 4400
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 7.813179016113281,
+      "learning_rate": 8.161733161733163e-06,
+      "loss": 0.5089,
+      "step": 4425
+    },
+    {
+      "epoch": 2.58,
+      "grad_norm": 13.45531940460205,
+      "learning_rate": 7.893607893607893e-06,
+      "loss": 0.4717,
+      "step": 4450
+    },
+    {
+      "epoch": 2.59,
+      "grad_norm": 7.09887170791626,
+      "learning_rate": 7.625482625482626e-06,
+      "loss": 0.6506,
+      "step": 4475
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 4.6297383308410645,
+      "learning_rate": 7.357357357357357e-06,
+      "loss": 0.4828,
+      "step": 4500
+    },
+    {
+      "epoch": 2.62,
+      "grad_norm": 8.980986595153809,
+      "learning_rate": 7.089232089232089e-06,
+      "loss": 0.4233,
+      "step": 4525
+    },
+    {
+      "epoch": 2.63,
+      "grad_norm": 26.76249885559082,
+      "learning_rate": 6.821106821106821e-06,
+      "loss": 0.5748,
+      "step": 4550
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 8.72842788696289,
+      "learning_rate": 6.552981552981553e-06,
+      "loss": 0.6565,
+      "step": 4575
+    },
+    {
+      "epoch": 2.66,
+      "grad_norm": 9.191315650939941,
+      "learning_rate": 6.284856284856284e-06,
+      "loss": 0.5332,
+      "step": 4600
+    },
+    {
+      "epoch": 2.68,
+      "grad_norm": 7.631181240081787,
+      "learning_rate": 6.016731016731017e-06,
+      "loss": 0.4692,
+      "step": 4625
+    },
+    {
+      "epoch": 2.69,
+      "grad_norm": 7.811351299285889,
+      "learning_rate": 5.748605748605749e-06,
+      "loss": 0.6485,
+      "step": 4650
+    },
+    {
+      "epoch": 2.71,
+      "grad_norm": 8.997116088867188,
+      "learning_rate": 5.480480480480481e-06,
+      "loss": 0.4207,
+      "step": 4675
+    },
+    {
+      "epoch": 2.72,
+      "grad_norm": 9.758033752441406,
+      "learning_rate": 5.212355212355213e-06,
+      "loss": 0.5205,
+      "step": 4700
+    },
+    {
+      "epoch": 2.74,
+      "grad_norm": 5.98590612411499,
+      "learning_rate": 4.944229944229944e-06,
+      "loss": 0.6115,
+      "step": 4725
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 11.506319046020508,
+      "learning_rate": 4.676104676104676e-06,
+      "loss": 0.4449,
+      "step": 4750
+    },
+    {
+      "epoch": 2.76,
+      "grad_norm": 7.969517230987549,
+      "learning_rate": 4.4079794079794084e-06,
+      "loss": 0.5384,
+      "step": 4775
+    },
+    {
+      "epoch": 2.78,
+      "grad_norm": 0.8463253974914551,
+      "learning_rate": 4.13985413985414e-06,
+      "loss": 0.4981,
+      "step": 4800
+    },
+    {
+      "epoch": 2.79,
+      "grad_norm": 12.771890640258789,
+      "learning_rate": 3.871728871728872e-06,
+      "loss": 0.4786,
+      "step": 4825
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 0.6047688126564026,
+      "learning_rate": 3.603603603603604e-06,
+      "loss": 0.4913,
+      "step": 4850
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 7.117040157318115,
+      "learning_rate": 3.3354783354783355e-06,
+      "loss": 0.5386,
+      "step": 4875
+    },
+    {
+      "epoch": 2.84,
+      "grad_norm": 5.262890338897705,
+      "learning_rate": 3.0673530673530676e-06,
+      "loss": 0.6115,
+      "step": 4900
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 0.5500399470329285,
+      "learning_rate": 2.7992277992277993e-06,
+      "loss": 0.5285,
+      "step": 4925
+    },
+    {
+      "epoch": 2.87,
+      "grad_norm": 2.5653154850006104,
+      "learning_rate": 2.531102531102531e-06,
+      "loss": 0.3621,
+      "step": 4950
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 5.71751594543457,
+      "learning_rate": 2.262977262977263e-06,
+      "loss": 0.6007,
+      "step": 4975
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 12.138904571533203,
+      "learning_rate": 1.9948519948519947e-06,
+      "loss": 0.5797,
+      "step": 5000
+    },
+    {
+      "epoch": 2.91,
+      "grad_norm": 8.795024871826172,
+      "learning_rate": 1.7267267267267268e-06,
+      "loss": 0.5585,
+      "step": 5025
+    },
+    {
+      "epoch": 2.92,
+      "grad_norm": 3.7619569301605225,
+      "learning_rate": 1.4586014586014587e-06,
+      "loss": 0.3951,
+      "step": 5050
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 2.763073205947876,
+      "learning_rate": 1.1904761904761904e-06,
+      "loss": 0.287,
+      "step": 5075
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 7.337412357330322,
+      "learning_rate": 9.223509223509224e-07,
+      "loss": 0.4416,
+      "step": 5100
+    },
+    {
+      "epoch": 2.97,
+      "grad_norm": 4.266438961029053,
+      "learning_rate": 6.542256542256542e-07,
+      "loss": 0.5275,
+      "step": 5125
+    },
+    {
+      "epoch": 2.98,
+      "grad_norm": 8.12879753112793,
+      "learning_rate": 3.8610038610038613e-07,
+      "loss": 0.4297,
+      "step": 5150
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 6.199108123779297,
+      "learning_rate": 1.1797511797511798e-07,
+      "loss": 0.4389,
+      "step": 5175
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8602430555555556,
+      "eval_f1_macro": 0.6149830093941424,
+      "eval_f1_micro": 0.8602430555555556,
+      "eval_f1_weighted": 0.8515059109185544,
+      "eval_loss": 0.4315283000469208,
+      "eval_precision_macro": 0.7610988679415244,
+      "eval_precision_micro": 0.8602430555555556,
+      "eval_precision_weighted": 0.8532444856848228,
+      "eval_recall_macro": 0.5527145295483504,
+      "eval_recall_micro": 0.8602430555555556,
+      "eval_recall_weighted": 0.8602430555555556,
+      "eval_runtime": 3373.7409,
+      "eval_samples_per_second": 1.024,
+      "eval_steps_per_second": 0.064,
+      "step": 5181
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5181,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 1.135272556528692e+19,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5181/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8cf9cc5217dd984b88cf855ba871f8174f809b8ece0ada12155d52dd6c77308
+size 4920

config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_name_or_path": "google/vit-large-patch16-224",
+  "_num_labels": 11,
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "altright",
+    "1": "bluesky",
+    "2": "facebook",
+    "3": "fediverse",
+    "4": "instagram",
+    "5": "irrelevant",
+    "6": "ngl",
+    "7": "reddit",
+    "8": "threads",
+    "9": "tumblr",
+    "10": "twitter"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "altright": 0,
+    "bluesky": 1,
+    "facebook": 2,
+    "fediverse": 3,
+    "instagram": 4,
+    "irrelevant": 5,
+    "ngl": 6,
+    "reddit": 7,
+    "threads": 8,
+    "tumblr": 9,
+    "twitter": 10
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 24,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb13c4b35ce597cb88a875a80db2847e13688ae876bf69bd6a658d707c9e1ca5
+size 1213298172

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_valid_processor_keys": [
+    "images",
+    "do_resize",
+    "size",
+    "resample",
+    "do_rescale",
+    "rescale_factor",
+    "do_normalize",
+    "image_mean",
+    "image_std",
+    "return_tensors",
+    "data_format",
+    "input_data_format"
+  ],
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

runs/Apr15_18-44-28_496e75b93dc4/events.out.tfevents.1713206669.496e75b93dc4.24181.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fbdf9df3502357d1e23696f1a0c279cfd071be7410b7166f70444cfd62cafe3
-size 50102

 version https://git-lfs.github.com/spec/v1
+oid sha256:a79c786e9cbeaa254f4714037c28e6a5a55e26eedd0415b62564706adf2f0489
+size 51500

runs/Apr15_18-44-28_496e75b93dc4/events.out.tfevents.1713286725.496e75b93dc4.24181.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfd8794b75e04ecf6d2dc53d79f89032f56d4a943065f1ac8ca1666c98414586
+size 921

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8cf9cc5217dd984b88cf855ba871f8174f809b8ece0ada12155d52dd6c77308
+size 4920

training_params.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "data_path": "xblock-large-patch2-224/autotrain-data",
+    "model": "google/vit-large-patch16-224",
+    "username": "howdyaendra",
+    "lr": 5e-05,
+    "epochs": 3,
+    "batch_size": 8,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "logging_steps": -1,
+    "project_name": "xblock-large-patch2-224",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "save_strategy": "epoch",
+    "push_to_hub": true,
+    "repo_id": "howdyaendra/xblock-large-patch2-224",
+    "evaluation_strategy": "epoch",
+    "image_column": "autotrain_image",
+    "target_column": "autotrain_label",
+    "log": "tensorboard"
+}