abhishek HF staff commited on Apr 29

Commit

039932b

•

1 Parent(s): 01560f0

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

README.md +27 -0
added_tokens.json +3 -0
checkpoint-5193/config.json +42 -0
checkpoint-5193/model.safetensors +3 -0
checkpoint-5193/optimizer.pt +3 -0
checkpoint-5193/rng_state_0.pth +3 -0
checkpoint-5193/rng_state_1.pth +3 -0
checkpoint-5193/rng_state_2.pth +3 -0
checkpoint-5193/rng_state_3.pth +3 -0
checkpoint-5193/scheduler.pt +3 -0
checkpoint-5193/trainer_state.json +1509 -0
checkpoint-5193/training_args.bin +3 -0
config.json +42 -0
model.safetensors +3 -0
runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714402394.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.0 +2 -2
runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714407850.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.1 +3 -0
special_tokens_map.json +15 -0
spm.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +28 -0

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+tags:
+- autotrain
+- text-regression
+widget:
+- text: "I love AutoTrain"
+datasets:
+- autotrain-m96nh-snymb/autotrain-data
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Regression
+## Validation Metrics
+loss: 0.282262921333313
+mse: 0.2820460796356201
+mae: 0.4189736545085907
+r2: 0.74436353679844
+rmse: 0.5310801267623901
+explained_variance: 0.7570163011550903

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

checkpoint-5193/config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "_num_labels": 1,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "target"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "target": 0
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

checkpoint-5193/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d68d3238d7e01448edfe7b1983dcf19cc58c6bfbb7628f33a97a5690b6e914d
+size 1740300340

checkpoint-5193/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f82c9fa96d35c5b639fda51433bc915f1712871b20849a7d91b7c30ad3c8545
+size 3480832048

checkpoint-5193/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9692e0df6d5b78451d0faa94b4317f3c3cd1a1bdea676ec7954514c02b9cfcdc
+size 15024

checkpoint-5193/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e60d67b3848a732be46492193f28d5c098caad9f51a52ca97768faf56dca6421
+size 15024

checkpoint-5193/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02d7e3cf0198b07ae0e4e7c0e7b2fd5824805aaf7c0ec58feda8b2ef7a83c7ef
+size 15024

checkpoint-5193/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5c52c0b9880b9a6b76baf795175159ea7ee4f04a9d7ffb3cd483a38f6311db7
+size 15024

checkpoint-5193/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ccb544db0a45b77c836df5bb36152996c97ff1a522be11d72b179afc3d5ba75
+size 1064

checkpoint-5193/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1509 @@

+{
+  "best_metric": 0.282262921333313,
+  "best_model_checkpoint": "autotrain-m96nh-snymb/checkpoint-5193",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 5193,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.014442518775274409,
+      "grad_norm": 212.5823516845703,
+      "learning_rate": 7.692307692307692e-08,
+      "loss": 10.235,
+      "step": 25
+    },
+    {
+      "epoch": 0.028885037550548817,
+      "grad_norm": 81.24781799316406,
+      "learning_rate": 1.7307692307692305e-07,
+      "loss": 10.1291,
+      "step": 50
+    },
+    {
+      "epoch": 0.043327556325823226,
+      "grad_norm": 302.68389892578125,
+      "learning_rate": 2.692307692307692e-07,
+      "loss": 9.6898,
+      "step": 75
+    },
+    {
+      "epoch": 0.057770075101097634,
+      "grad_norm": 327.724853515625,
+      "learning_rate": 3.615384615384615e-07,
+      "loss": 10.4787,
+      "step": 100
+    },
+    {
+      "epoch": 0.07221259387637204,
+      "grad_norm": 135.6590576171875,
+      "learning_rate": 4.538461538461538e-07,
+      "loss": 9.5916,
+      "step": 125
+    },
+    {
+      "epoch": 0.08665511265164645,
+      "grad_norm": 108.32626342773438,
+      "learning_rate": 5.5e-07,
+      "loss": 8.8043,
+      "step": 150
+    },
+    {
+      "epoch": 0.10109763142692085,
+      "grad_norm": 73.20294189453125,
+      "learning_rate": 6.461538461538462e-07,
+      "loss": 7.1787,
+      "step": 175
+    },
+    {
+      "epoch": 0.11554015020219527,
+      "grad_norm": 73.47607421875,
+      "learning_rate": 7.423076923076923e-07,
+      "loss": 5.024,
+      "step": 200
+    },
+    {
+      "epoch": 0.12998266897746968,
+      "grad_norm": 73.16763305664062,
+      "learning_rate": 8.384615384615384e-07,
+      "loss": 2.586,
+      "step": 225
+    },
+    {
+      "epoch": 0.14442518775274407,
+      "grad_norm": 42.77627182006836,
+      "learning_rate": 9.346153846153846e-07,
+      "loss": 0.9176,
+      "step": 250
+    },
+    {
+      "epoch": 0.1588677065280185,
+      "grad_norm": 73.87055969238281,
+      "learning_rate": 1.0307692307692306e-06,
+      "loss": 1.0465,
+      "step": 275
+    },
+    {
+      "epoch": 0.1733102253032929,
+      "grad_norm": 29.49479103088379,
+      "learning_rate": 1.1269230769230768e-06,
+      "loss": 0.6761,
+      "step": 300
+    },
+    {
+      "epoch": 0.1877527440785673,
+      "grad_norm": 78.30584716796875,
+      "learning_rate": 1.2230769230769231e-06,
+      "loss": 0.5363,
+      "step": 325
+    },
+    {
+      "epoch": 0.2021952628538417,
+      "grad_norm": 23.788848876953125,
+      "learning_rate": 1.3192307692307692e-06,
+      "loss": 0.437,
+      "step": 350
+    },
+    {
+      "epoch": 0.21663778162911612,
+      "grad_norm": 5.925709247589111,
+      "learning_rate": 1.4153846153846155e-06,
+      "loss": 0.5172,
+      "step": 375
+    },
+    {
+      "epoch": 0.23108030040439054,
+      "grad_norm": 32.11520767211914,
+      "learning_rate": 1.5115384615384615e-06,
+      "loss": 0.509,
+      "step": 400
+    },
+    {
+      "epoch": 0.24552281917966493,
+      "grad_norm": 14.887007713317871,
+      "learning_rate": 1.6076923076923076e-06,
+      "loss": 0.6094,
+      "step": 425
+    },
+    {
+      "epoch": 0.25996533795493937,
+      "grad_norm": 31.848352432250977,
+      "learning_rate": 1.7038461538461536e-06,
+      "loss": 0.5056,
+      "step": 450
+    },
+    {
+      "epoch": 0.27440785673021373,
+      "grad_norm": 72.35002136230469,
+      "learning_rate": 1.8e-06,
+      "loss": 0.4752,
+      "step": 475
+    },
+    {
+      "epoch": 0.28885037550548814,
+      "grad_norm": 12.196418762207031,
+      "learning_rate": 1.896153846153846e-06,
+      "loss": 0.375,
+      "step": 500
+    },
+    {
+      "epoch": 0.30329289428076256,
+      "grad_norm": 25.701213836669922,
+      "learning_rate": 1.9923076923076923e-06,
+      "loss": 0.4961,
+      "step": 525
+    },
+    {
+      "epoch": 0.317735413056037,
+      "grad_norm": 41.281166076660156,
+      "learning_rate": 1.9901562165632354e-06,
+      "loss": 0.4455,
+      "step": 550
+    },
+    {
+      "epoch": 0.3321779318313114,
+      "grad_norm": 19.428579330444336,
+      "learning_rate": 1.979456451958057e-06,
+      "loss": 0.6306,
+      "step": 575
+    },
+    {
+      "epoch": 0.3466204506065858,
+      "grad_norm": 18.564756393432617,
+      "learning_rate": 1.9687566873528784e-06,
+      "loss": 0.399,
+      "step": 600
+    },
+    {
+      "epoch": 0.3610629693818602,
+      "grad_norm": 41.66643524169922,
+      "learning_rate": 1.9580569227476994e-06,
+      "loss": 0.4581,
+      "step": 625
+    },
+    {
+      "epoch": 0.3755054881571346,
+      "grad_norm": 41.7965202331543,
+      "learning_rate": 1.947357158142521e-06,
+      "loss": 0.4799,
+      "step": 650
+    },
+    {
+      "epoch": 0.389948006932409,
+      "grad_norm": 9.624334335327148,
+      "learning_rate": 1.936657393537342e-06,
+      "loss": 0.4632,
+      "step": 675
+    },
+    {
+      "epoch": 0.4043905257076834,
+      "grad_norm": 11.227400779724121,
+      "learning_rate": 1.9259576289321635e-06,
+      "loss": 0.3521,
+      "step": 700
+    },
+    {
+      "epoch": 0.41883304448295783,
+      "grad_norm": 22.89958953857422,
+      "learning_rate": 1.915257864326985e-06,
+      "loss": 0.4414,
+      "step": 725
+    },
+    {
+      "epoch": 0.43327556325823224,
+      "grad_norm": 26.13202476501465,
+      "learning_rate": 1.904558099721806e-06,
+      "loss": 0.3836,
+      "step": 750
+    },
+    {
+      "epoch": 0.44771808203350666,
+      "grad_norm": 55.14106750488281,
+      "learning_rate": 1.8938583351166273e-06,
+      "loss": 0.5554,
+      "step": 775
+    },
+    {
+      "epoch": 0.4621606008087811,
+      "grad_norm": 32.7313346862793,
+      "learning_rate": 1.8831585705114485e-06,
+      "loss": 0.4613,
+      "step": 800
+    },
+    {
+      "epoch": 0.47660311958405543,
+      "grad_norm": 14.63770866394043,
+      "learning_rate": 1.87245880590627e-06,
+      "loss": 0.3672,
+      "step": 825
+    },
+    {
+      "epoch": 0.49104563835932985,
+      "grad_norm": 39.93565368652344,
+      "learning_rate": 1.8617590413010913e-06,
+      "loss": 0.495,
+      "step": 850
+    },
+    {
+      "epoch": 0.5054881571346043,
+      "grad_norm": 13.94717025756836,
+      "learning_rate": 1.8510592766959126e-06,
+      "loss": 0.3883,
+      "step": 875
+    },
+    {
+      "epoch": 0.5199306759098787,
+      "grad_norm": 19.33357810974121,
+      "learning_rate": 1.8403595120907338e-06,
+      "loss": 0.3607,
+      "step": 900
+    },
+    {
+      "epoch": 0.5343731946851531,
+      "grad_norm": 9.50666618347168,
+      "learning_rate": 1.8296597474855553e-06,
+      "loss": 0.4309,
+      "step": 925
+    },
+    {
+      "epoch": 0.5488157134604275,
+      "grad_norm": 99.26518249511719,
+      "learning_rate": 1.8189599828803766e-06,
+      "loss": 0.4734,
+      "step": 950
+    },
+    {
+      "epoch": 0.5632582322357019,
+      "grad_norm": 25.336233139038086,
+      "learning_rate": 1.8082602182751978e-06,
+      "loss": 0.5486,
+      "step": 975
+    },
+    {
+      "epoch": 0.5777007510109763,
+      "grad_norm": 41.74341583251953,
+      "learning_rate": 1.7975604536700191e-06,
+      "loss": 0.378,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5921432697862508,
+      "grad_norm": 37.5980224609375,
+      "learning_rate": 1.7868606890648406e-06,
+      "loss": 0.3274,
+      "step": 1025
+    },
+    {
+      "epoch": 0.6065857885615251,
+      "grad_norm": 46.92686080932617,
+      "learning_rate": 1.7761609244596619e-06,
+      "loss": 0.3263,
+      "step": 1050
+    },
+    {
+      "epoch": 0.6210283073367996,
+      "grad_norm": 20.776403427124023,
+      "learning_rate": 1.7654611598544831e-06,
+      "loss": 0.3563,
+      "step": 1075
+    },
+    {
+      "epoch": 0.635470826112074,
+      "grad_norm": 26.412818908691406,
+      "learning_rate": 1.7547613952493044e-06,
+      "loss": 0.354,
+      "step": 1100
+    },
+    {
+      "epoch": 0.6499133448873483,
+      "grad_norm": 35.234580993652344,
+      "learning_rate": 1.7440616306441259e-06,
+      "loss": 0.4225,
+      "step": 1125
+    },
+    {
+      "epoch": 0.6643558636626228,
+      "grad_norm": 38.81793212890625,
+      "learning_rate": 1.7333618660389472e-06,
+      "loss": 0.3189,
+      "step": 1150
+    },
+    {
+      "epoch": 0.6787983824378971,
+      "grad_norm": 26.322595596313477,
+      "learning_rate": 1.7226621014337684e-06,
+      "loss": 0.3566,
+      "step": 1175
+    },
+    {
+      "epoch": 0.6932409012131716,
+      "grad_norm": 19.16035270690918,
+      "learning_rate": 1.7119623368285897e-06,
+      "loss": 0.3212,
+      "step": 1200
+    },
+    {
+      "epoch": 0.707683419988446,
+      "grad_norm": 26.820486068725586,
+      "learning_rate": 1.7012625722234112e-06,
+      "loss": 0.3318,
+      "step": 1225
+    },
+    {
+      "epoch": 0.7221259387637204,
+      "grad_norm": 9.583172798156738,
+      "learning_rate": 1.6905628076182322e-06,
+      "loss": 0.3306,
+      "step": 1250
+    },
+    {
+      "epoch": 0.7365684575389948,
+      "grad_norm": 67.81623840332031,
+      "learning_rate": 1.6798630430130535e-06,
+      "loss": 0.3503,
+      "step": 1275
+    },
+    {
+      "epoch": 0.7510109763142692,
+      "grad_norm": 26.901622772216797,
+      "learning_rate": 1.6691632784078748e-06,
+      "loss": 0.3112,
+      "step": 1300
+    },
+    {
+      "epoch": 0.7654534950895436,
+      "grad_norm": 19.61265754699707,
+      "learning_rate": 1.6584635138026963e-06,
+      "loss": 0.3354,
+      "step": 1325
+    },
+    {
+      "epoch": 0.779896013864818,
+      "grad_norm": 26.634790420532227,
+      "learning_rate": 1.6477637491975175e-06,
+      "loss": 0.4247,
+      "step": 1350
+    },
+    {
+      "epoch": 0.7943385326400925,
+      "grad_norm": 12.382301330566406,
+      "learning_rate": 1.6370639845923388e-06,
+      "loss": 0.3341,
+      "step": 1375
+    },
+    {
+      "epoch": 0.8087810514153668,
+      "grad_norm": 17.31427574157715,
+      "learning_rate": 1.62636421998716e-06,
+      "loss": 0.3274,
+      "step": 1400
+    },
+    {
+      "epoch": 0.8232235701906413,
+      "grad_norm": 11.828612327575684,
+      "learning_rate": 1.6156644553819815e-06,
+      "loss": 0.3575,
+      "step": 1425
+    },
+    {
+      "epoch": 0.8376660889659157,
+      "grad_norm": 12.682332038879395,
+      "learning_rate": 1.6049646907768028e-06,
+      "loss": 0.2944,
+      "step": 1450
+    },
+    {
+      "epoch": 0.85210860774119,
+      "grad_norm": 14.496199607849121,
+      "learning_rate": 1.594264926171624e-06,
+      "loss": 0.3447,
+      "step": 1475
+    },
+    {
+      "epoch": 0.8665511265164645,
+      "grad_norm": 20.546905517578125,
+      "learning_rate": 1.5835651615664453e-06,
+      "loss": 0.3162,
+      "step": 1500
+    },
+    {
+      "epoch": 0.8809936452917388,
+      "grad_norm": 12.139562606811523,
+      "learning_rate": 1.5728653969612668e-06,
+      "loss": 0.3553,
+      "step": 1525
+    },
+    {
+      "epoch": 0.8954361640670133,
+      "grad_norm": 38.39575958251953,
+      "learning_rate": 1.562165632356088e-06,
+      "loss": 0.3929,
+      "step": 1550
+    },
+    {
+      "epoch": 0.9098786828422877,
+      "grad_norm": 9.470243453979492,
+      "learning_rate": 1.5514658677509094e-06,
+      "loss": 0.2997,
+      "step": 1575
+    },
+    {
+      "epoch": 0.9243212016175621,
+      "grad_norm": 14.675395965576172,
+      "learning_rate": 1.5407661031457306e-06,
+      "loss": 0.3275,
+      "step": 1600
+    },
+    {
+      "epoch": 0.9387637203928365,
+      "grad_norm": 3.3243517875671387,
+      "learning_rate": 1.5300663385405521e-06,
+      "loss": 0.426,
+      "step": 1625
+    },
+    {
+      "epoch": 0.9532062391681109,
+      "grad_norm": 12.924434661865234,
+      "learning_rate": 1.5193665739353734e-06,
+      "loss": 0.2976,
+      "step": 1650
+    },
+    {
+      "epoch": 0.9676487579433853,
+      "grad_norm": 46.80762481689453,
+      "learning_rate": 1.5086668093301947e-06,
+      "loss": 0.3499,
+      "step": 1675
+    },
+    {
+      "epoch": 0.9820912767186597,
+      "grad_norm": 15.062408447265625,
+      "learning_rate": 1.497967044725016e-06,
+      "loss": 0.3225,
+      "step": 1700
+    },
+    {
+      "epoch": 0.9965337954939342,
+      "grad_norm": 22.281009674072266,
+      "learning_rate": 1.4872672801198374e-06,
+      "loss": 0.4113,
+      "step": 1725
+    },
+    {
+      "epoch": 1.0,
+      "eval_explained_variance": 0.7363581657409668,
+      "eval_loss": 0.3119768798351288,
+      "eval_mae": 0.4405648112297058,
+      "eval_mse": 0.31180134415626526,
+      "eval_r2": 0.7173944470629221,
+      "eval_rmse": 0.5583917498588562,
+      "eval_runtime": 80.5336,
+      "eval_samples_per_second": 42.988,
+      "eval_steps_per_second": 2.695,
+      "step": 1731
+    },
+    {
+      "epoch": 1.0109763142692085,
+      "grad_norm": 27.233137130737305,
+      "learning_rate": 1.4765675155146587e-06,
+      "loss": 0.3579,
+      "step": 1750
+    },
+    {
+      "epoch": 1.025418833044483,
+      "grad_norm": 23.662677764892578,
+      "learning_rate": 1.46586775090948e-06,
+      "loss": 0.3746,
+      "step": 1775
+    },
+    {
+      "epoch": 1.0398613518197575,
+      "grad_norm": 27.650678634643555,
+      "learning_rate": 1.4551679863043012e-06,
+      "loss": 0.3723,
+      "step": 1800
+    },
+    {
+      "epoch": 1.0543038705950318,
+      "grad_norm": 38.274314880371094,
+      "learning_rate": 1.4444682216991227e-06,
+      "loss": 0.3705,
+      "step": 1825
+    },
+    {
+      "epoch": 1.0687463893703062,
+      "grad_norm": 43.43634796142578,
+      "learning_rate": 1.433768457093944e-06,
+      "loss": 0.2841,
+      "step": 1850
+    },
+    {
+      "epoch": 1.0831889081455806,
+      "grad_norm": 19.07291030883789,
+      "learning_rate": 1.4230686924887652e-06,
+      "loss": 0.4436,
+      "step": 1875
+    },
+    {
+      "epoch": 1.097631426920855,
+      "grad_norm": 36.660614013671875,
+      "learning_rate": 1.4123689278835865e-06,
+      "loss": 0.3031,
+      "step": 1900
+    },
+    {
+      "epoch": 1.1120739456961295,
+      "grad_norm": 10.8666353225708,
+      "learning_rate": 1.401669163278408e-06,
+      "loss": 0.2895,
+      "step": 1925
+    },
+    {
+      "epoch": 1.1265164644714039,
+      "grad_norm": 34.650394439697266,
+      "learning_rate": 1.3909693986732293e-06,
+      "loss": 0.3506,
+      "step": 1950
+    },
+    {
+      "epoch": 1.1409589832466782,
+      "grad_norm": 43.247623443603516,
+      "learning_rate": 1.3802696340680503e-06,
+      "loss": 0.3555,
+      "step": 1975
+    },
+    {
+      "epoch": 1.1554015020219526,
+      "grad_norm": 18.716602325439453,
+      "learning_rate": 1.3695698694628716e-06,
+      "loss": 0.3931,
+      "step": 2000
+    },
+    {
+      "epoch": 1.169844020797227,
+      "grad_norm": 31.356761932373047,
+      "learning_rate": 1.358870104857693e-06,
+      "loss": 0.2976,
+      "step": 2025
+    },
+    {
+      "epoch": 1.1842865395725015,
+      "grad_norm": 18.609111785888672,
+      "learning_rate": 1.3481703402525143e-06,
+      "loss": 0.3163,
+      "step": 2050
+    },
+    {
+      "epoch": 1.1987290583477759,
+      "grad_norm": 31.023008346557617,
+      "learning_rate": 1.3374705756473356e-06,
+      "loss": 0.3454,
+      "step": 2075
+    },
+    {
+      "epoch": 1.2131715771230502,
+      "grad_norm": 27.93479347229004,
+      "learning_rate": 1.3267708110421569e-06,
+      "loss": 0.3452,
+      "step": 2100
+    },
+    {
+      "epoch": 1.2276140958983246,
+      "grad_norm": 23.254547119140625,
+      "learning_rate": 1.3160710464369784e-06,
+      "loss": 0.3486,
+      "step": 2125
+    },
+    {
+      "epoch": 1.242056614673599,
+      "grad_norm": 45.776458740234375,
+      "learning_rate": 1.3053712818317996e-06,
+      "loss": 0.3586,
+      "step": 2150
+    },
+    {
+      "epoch": 1.2564991334488735,
+      "grad_norm": 14.92525863647461,
+      "learning_rate": 1.294671517226621e-06,
+      "loss": 0.3338,
+      "step": 2175
+    },
+    {
+      "epoch": 1.270941652224148,
+      "grad_norm": 20.12270736694336,
+      "learning_rate": 1.2839717526214422e-06,
+      "loss": 0.3437,
+      "step": 2200
+    },
+    {
+      "epoch": 1.2853841709994223,
+      "grad_norm": 41.65699005126953,
+      "learning_rate": 1.2732719880162636e-06,
+      "loss": 0.3264,
+      "step": 2225
+    },
+    {
+      "epoch": 1.2998266897746968,
+      "grad_norm": 32.03495788574219,
+      "learning_rate": 1.262572223411085e-06,
+      "loss": 0.3404,
+      "step": 2250
+    },
+    {
+      "epoch": 1.314269208549971,
+      "grad_norm": 4.864631175994873,
+      "learning_rate": 1.2518724588059062e-06,
+      "loss": 0.296,
+      "step": 2275
+    },
+    {
+      "epoch": 1.3287117273252456,
+      "grad_norm": 10.562322616577148,
+      "learning_rate": 1.2411726942007275e-06,
+      "loss": 0.3442,
+      "step": 2300
+    },
+    {
+      "epoch": 1.34315424610052,
+      "grad_norm": 33.48724365234375,
+      "learning_rate": 1.230472929595549e-06,
+      "loss": 0.257,
+      "step": 2325
+    },
+    {
+      "epoch": 1.3575967648757943,
+      "grad_norm": 19.912137985229492,
+      "learning_rate": 1.2197731649903702e-06,
+      "loss": 0.2968,
+      "step": 2350
+    },
+    {
+      "epoch": 1.3720392836510689,
+      "grad_norm": 22.246639251708984,
+      "learning_rate": 1.2090734003851915e-06,
+      "loss": 0.2793,
+      "step": 2375
+    },
+    {
+      "epoch": 1.3864818024263432,
+      "grad_norm": 18.22015380859375,
+      "learning_rate": 1.1983736357800127e-06,
+      "loss": 0.3079,
+      "step": 2400
+    },
+    {
+      "epoch": 1.4009243212016176,
+      "grad_norm": 15.965062141418457,
+      "learning_rate": 1.1876738711748342e-06,
+      "loss": 0.335,
+      "step": 2425
+    },
+    {
+      "epoch": 1.415366839976892,
+      "grad_norm": 20.45452117919922,
+      "learning_rate": 1.1769741065696555e-06,
+      "loss": 0.3061,
+      "step": 2450
+    },
+    {
+      "epoch": 1.4298093587521663,
+      "grad_norm": 13.89696216583252,
+      "learning_rate": 1.1662743419644768e-06,
+      "loss": 0.275,
+      "step": 2475
+    },
+    {
+      "epoch": 1.4442518775274409,
+      "grad_norm": 35.64567947387695,
+      "learning_rate": 1.155574577359298e-06,
+      "loss": 0.3471,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4586943963027152,
+      "grad_norm": 14.65186882019043,
+      "learning_rate": 1.1448748127541195e-06,
+      "loss": 0.314,
+      "step": 2525
+    },
+    {
+      "epoch": 1.4731369150779896,
+      "grad_norm": 14.541102409362793,
+      "learning_rate": 1.1341750481489408e-06,
+      "loss": 0.2916,
+      "step": 2550
+    },
+    {
+      "epoch": 1.487579433853264,
+      "grad_norm": 37.96781539916992,
+      "learning_rate": 1.123475283543762e-06,
+      "loss": 0.3849,
+      "step": 2575
+    },
+    {
+      "epoch": 1.5020219526285383,
+      "grad_norm": 16.675336837768555,
+      "learning_rate": 1.1127755189385833e-06,
+      "loss": 0.2672,
+      "step": 2600
+    },
+    {
+      "epoch": 1.516464471403813,
+      "grad_norm": 28.15886116027832,
+      "learning_rate": 1.1020757543334048e-06,
+      "loss": 0.3031,
+      "step": 2625
+    },
+    {
+      "epoch": 1.5309069901790873,
+      "grad_norm": 28.914554595947266,
+      "learning_rate": 1.091375989728226e-06,
+      "loss": 0.2585,
+      "step": 2650
+    },
+    {
+      "epoch": 1.5453495089543616,
+      "grad_norm": 31.952404022216797,
+      "learning_rate": 1.0806762251230471e-06,
+      "loss": 0.3405,
+      "step": 2675
+    },
+    {
+      "epoch": 1.5597920277296362,
+      "grad_norm": 62.01006317138672,
+      "learning_rate": 1.0699764605178684e-06,
+      "loss": 0.2892,
+      "step": 2700
+    },
+    {
+      "epoch": 1.5742345465049103,
+      "grad_norm": 15.054553985595703,
+      "learning_rate": 1.0592766959126899e-06,
+      "loss": 0.2902,
+      "step": 2725
+    },
+    {
+      "epoch": 1.588677065280185,
+      "grad_norm": 39.178443908691406,
+      "learning_rate": 1.0485769313075112e-06,
+      "loss": 0.2743,
+      "step": 2750
+    },
+    {
+      "epoch": 1.6031195840554593,
+      "grad_norm": 43.06193923950195,
+      "learning_rate": 1.0378771667023324e-06,
+      "loss": 0.2982,
+      "step": 2775
+    },
+    {
+      "epoch": 1.6175621028307337,
+      "grad_norm": 43.87297821044922,
+      "learning_rate": 1.0271774020971537e-06,
+      "loss": 0.2902,
+      "step": 2800
+    },
+    {
+      "epoch": 1.6320046216060082,
+      "grad_norm": 21.78912925720215,
+      "learning_rate": 1.0164776374919752e-06,
+      "loss": 0.3465,
+      "step": 2825
+    },
+    {
+      "epoch": 1.6464471403812824,
+      "grad_norm": 15.053204536437988,
+      "learning_rate": 1.0057778728867964e-06,
+      "loss": 0.3213,
+      "step": 2850
+    },
+    {
+      "epoch": 1.660889659156557,
+      "grad_norm": 21.79863166809082,
+      "learning_rate": 9.950781082816177e-07,
+      "loss": 0.3278,
+      "step": 2875
+    },
+    {
+      "epoch": 1.6753321779318313,
+      "grad_norm": 58.025299072265625,
+      "learning_rate": 9.843783436764392e-07,
+      "loss": 0.2828,
+      "step": 2900
+    },
+    {
+      "epoch": 1.6897746967071057,
+      "grad_norm": 22.137096405029297,
+      "learning_rate": 9.736785790712605e-07,
+      "loss": 0.3023,
+      "step": 2925
+    },
+    {
+      "epoch": 1.7042172154823803,
+      "grad_norm": 19.531232833862305,
+      "learning_rate": 9.629788144660817e-07,
+      "loss": 0.3039,
+      "step": 2950
+    },
+    {
+      "epoch": 1.7186597342576544,
+      "grad_norm": 59.77436065673828,
+      "learning_rate": 9.52279049860903e-07,
+      "loss": 0.3376,
+      "step": 2975
+    },
+    {
+      "epoch": 1.733102253032929,
+      "grad_norm": 27.803564071655273,
+      "learning_rate": 9.415792852557243e-07,
+      "loss": 0.2839,
+      "step": 3000
+    },
+    {
+      "epoch": 1.7475447718082033,
+      "grad_norm": 21.773244857788086,
+      "learning_rate": 9.308795206505456e-07,
+      "loss": 0.3372,
+      "step": 3025
+    },
+    {
+      "epoch": 1.7619872905834777,
+      "grad_norm": 11.421875953674316,
+      "learning_rate": 9.201797560453669e-07,
+      "loss": 0.3754,
+      "step": 3050
+    },
+    {
+      "epoch": 1.7764298093587523,
+      "grad_norm": 14.211411476135254,
+      "learning_rate": 9.094799914401883e-07,
+      "loss": 0.3214,
+      "step": 3075
+    },
+    {
+      "epoch": 1.7908723281340264,
+      "grad_norm": 43.777278900146484,
+      "learning_rate": 8.987802268350096e-07,
+      "loss": 0.3508,
+      "step": 3100
+    },
+    {
+      "epoch": 1.805314846909301,
+      "grad_norm": 38.14100646972656,
+      "learning_rate": 8.880804622298309e-07,
+      "loss": 0.2535,
+      "step": 3125
+    },
+    {
+      "epoch": 1.8197573656845754,
+      "grad_norm": 15.347945213317871,
+      "learning_rate": 8.773806976246522e-07,
+      "loss": 0.3121,
+      "step": 3150
+    },
+    {
+      "epoch": 1.8341998844598497,
+      "grad_norm": 8.05485725402832,
+      "learning_rate": 8.666809330194736e-07,
+      "loss": 0.3227,
+      "step": 3175
+    },
+    {
+      "epoch": 1.8486424032351243,
+      "grad_norm": 11.664706230163574,
+      "learning_rate": 8.559811684142948e-07,
+      "loss": 0.3061,
+      "step": 3200
+    },
+    {
+      "epoch": 1.8630849220103987,
+      "grad_norm": 7.515502452850342,
+      "learning_rate": 8.452814038091161e-07,
+      "loss": 0.2753,
+      "step": 3225
+    },
+    {
+      "epoch": 1.877527440785673,
+      "grad_norm": 30.233638763427734,
+      "learning_rate": 8.345816392039374e-07,
+      "loss": 0.3518,
+      "step": 3250
+    },
+    {
+      "epoch": 1.8919699595609474,
+      "grad_norm": 16.609712600708008,
+      "learning_rate": 8.238818745987588e-07,
+      "loss": 0.3087,
+      "step": 3275
+    },
+    {
+      "epoch": 1.9064124783362217,
+      "grad_norm": 12.235444068908691,
+      "learning_rate": 8.1318210999358e-07,
+      "loss": 0.3224,
+      "step": 3300
+    },
+    {
+      "epoch": 1.9208549971114963,
+      "grad_norm": 36.453224182128906,
+      "learning_rate": 8.024823453884014e-07,
+      "loss": 0.3311,
+      "step": 3325
+    },
+    {
+      "epoch": 1.9352975158867707,
+      "grad_norm": 21.512168884277344,
+      "learning_rate": 7.917825807832227e-07,
+      "loss": 0.2857,
+      "step": 3350
+    },
+    {
+      "epoch": 1.949740034662045,
+      "grad_norm": 9.703317642211914,
+      "learning_rate": 7.81082816178044e-07,
+      "loss": 0.2662,
+      "step": 3375
+    },
+    {
+      "epoch": 1.9641825534373196,
+      "grad_norm": 17.714481353759766,
+      "learning_rate": 7.703830515728653e-07,
+      "loss": 0.291,
+      "step": 3400
+    },
+    {
+      "epoch": 1.9786250722125938,
+      "grad_norm": 22.379777908325195,
+      "learning_rate": 7.596832869676867e-07,
+      "loss": 0.3267,
+      "step": 3425
+    },
+    {
+      "epoch": 1.9930675909878683,
+      "grad_norm": 8.563464164733887,
+      "learning_rate": 7.48983522362508e-07,
+      "loss": 0.2939,
+      "step": 3450
+    },
+    {
+      "epoch": 2.0,
+      "eval_explained_variance": 0.7469815611839294,
+      "eval_loss": 0.31648534536361694,
+      "eval_mae": 0.4420657455921173,
+      "eval_mse": 0.3162277936935425,
+      "eval_r2": 0.7133824489512686,
+      "eval_rmse": 0.5623413324356079,
+      "eval_runtime": 80.5259,
+      "eval_samples_per_second": 42.992,
+      "eval_steps_per_second": 2.695,
+      "step": 3462
+    },
+    {
+      "epoch": 2.0075101097631425,
+      "grad_norm": 31.694887161254883,
+      "learning_rate": 7.382837577573293e-07,
+      "loss": 0.2527,
+      "step": 3475
+    },
+    {
+      "epoch": 2.021952628538417,
+      "grad_norm": 24.721397399902344,
+      "learning_rate": 7.275839931521506e-07,
+      "loss": 0.2815,
+      "step": 3500
+    },
+    {
+      "epoch": 2.0363951473136916,
+      "grad_norm": 33.44636917114258,
+      "learning_rate": 7.16884228546972e-07,
+      "loss": 0.3586,
+      "step": 3525
+    },
+    {
+      "epoch": 2.050837666088966,
+      "grad_norm": 44.72824478149414,
+      "learning_rate": 7.061844639417933e-07,
+      "loss": 0.3163,
+      "step": 3550
+    },
+    {
+      "epoch": 2.0652801848642404,
+      "grad_norm": 22.65967559814453,
+      "learning_rate": 6.954846993366146e-07,
+      "loss": 0.2523,
+      "step": 3575
+    },
+    {
+      "epoch": 2.079722703639515,
+      "grad_norm": 9.611360549926758,
+      "learning_rate": 6.847849347314358e-07,
+      "loss": 0.2609,
+      "step": 3600
+    },
+    {
+      "epoch": 2.094165222414789,
+      "grad_norm": 19.328899383544922,
+      "learning_rate": 6.740851701262572e-07,
+      "loss": 0.3327,
+      "step": 3625
+    },
+    {
+      "epoch": 2.1086077411900637,
+      "grad_norm": 17.564197540283203,
+      "learning_rate": 6.633854055210784e-07,
+      "loss": 0.2777,
+      "step": 3650
+    },
+    {
+      "epoch": 2.123050259965338,
+      "grad_norm": 35.05995178222656,
+      "learning_rate": 6.526856409158998e-07,
+      "loss": 0.2516,
+      "step": 3675
+    },
+    {
+      "epoch": 2.1374927787406124,
+      "grad_norm": 17.389116287231445,
+      "learning_rate": 6.419858763107211e-07,
+      "loss": 0.2387,
+      "step": 3700
+    },
+    {
+      "epoch": 2.151935297515887,
+      "grad_norm": 18.017724990844727,
+      "learning_rate": 6.312861117055425e-07,
+      "loss": 0.2764,
+      "step": 3725
+    },
+    {
+      "epoch": 2.166377816291161,
+      "grad_norm": 13.583812713623047,
+      "learning_rate": 6.205863471003637e-07,
+      "loss": 0.2717,
+      "step": 3750
+    },
+    {
+      "epoch": 2.1808203350664357,
+      "grad_norm": 18.499242782592773,
+      "learning_rate": 6.098865824951851e-07,
+      "loss": 0.2439,
+      "step": 3775
+    },
+    {
+      "epoch": 2.19526285384171,
+      "grad_norm": 17.863845825195312,
+      "learning_rate": 5.991868178900064e-07,
+      "loss": 0.2498,
+      "step": 3800
+    },
+    {
+      "epoch": 2.2097053726169844,
+      "grad_norm": 42.79360580444336,
+      "learning_rate": 5.884870532848277e-07,
+      "loss": 0.2986,
+      "step": 3825
+    },
+    {
+      "epoch": 2.224147891392259,
+      "grad_norm": 18.10019302368164,
+      "learning_rate": 5.77787288679649e-07,
+      "loss": 0.2692,
+      "step": 3850
+    },
+    {
+      "epoch": 2.238590410167533,
+      "grad_norm": 52.769935607910156,
+      "learning_rate": 5.670875240744704e-07,
+      "loss": 0.3265,
+      "step": 3875
+    },
+    {
+      "epoch": 2.2530329289428077,
+      "grad_norm": 42.038516998291016,
+      "learning_rate": 5.563877594692917e-07,
+      "loss": 0.3196,
+      "step": 3900
+    },
+    {
+      "epoch": 2.267475447718082,
+      "grad_norm": 14.1666898727417,
+      "learning_rate": 5.45687994864113e-07,
+      "loss": 0.2888,
+      "step": 3925
+    },
+    {
+      "epoch": 2.2819179664933564,
+      "grad_norm": 16.471778869628906,
+      "learning_rate": 5.349882302589342e-07,
+      "loss": 0.2782,
+      "step": 3950
+    },
+    {
+      "epoch": 2.296360485268631,
+      "grad_norm": 9.197157859802246,
+      "learning_rate": 5.242884656537556e-07,
+      "loss": 0.3127,
+      "step": 3975
+    },
+    {
+      "epoch": 2.310803004043905,
+      "grad_norm": 19.208568572998047,
+      "learning_rate": 5.135887010485768e-07,
+      "loss": 0.2572,
+      "step": 4000
+    },
+    {
+      "epoch": 2.3252455228191797,
+      "grad_norm": 5.966078758239746,
+      "learning_rate": 5.028889364433982e-07,
+      "loss": 0.2631,
+      "step": 4025
+    },
+    {
+      "epoch": 2.339688041594454,
+      "grad_norm": 27.037731170654297,
+      "learning_rate": 4.921891718382196e-07,
+      "loss": 0.2794,
+      "step": 4050
+    },
+    {
+      "epoch": 2.3541305603697285,
+      "grad_norm": 39.20252990722656,
+      "learning_rate": 4.814894072330409e-07,
+      "loss": 0.2656,
+      "step": 4075
+    },
+    {
+      "epoch": 2.368573079145003,
+      "grad_norm": 32.399147033691406,
+      "learning_rate": 4.7078964262786213e-07,
+      "loss": 0.2654,
+      "step": 4100
+    },
+    {
+      "epoch": 2.383015597920277,
+      "grad_norm": 23.706451416015625,
+      "learning_rate": 4.6008987802268346e-07,
+      "loss": 0.2887,
+      "step": 4125
+    },
+    {
+      "epoch": 2.3974581166955518,
+      "grad_norm": 15.86970043182373,
+      "learning_rate": 4.493901134175048e-07,
+      "loss": 0.2568,
+      "step": 4150
+    },
+    {
+      "epoch": 2.4119006354708263,
+      "grad_norm": 27.933916091918945,
+      "learning_rate": 4.386903488123261e-07,
+      "loss": 0.2536,
+      "step": 4175
+    },
+    {
+      "epoch": 2.4263431542461005,
+      "grad_norm": 16.812334060668945,
+      "learning_rate": 4.279905842071474e-07,
+      "loss": 0.2542,
+      "step": 4200
+    },
+    {
+      "epoch": 2.440785673021375,
+      "grad_norm": 38.82505416870117,
+      "learning_rate": 4.172908196019687e-07,
+      "loss": 0.2872,
+      "step": 4225
+    },
+    {
+      "epoch": 2.455228191796649,
+      "grad_norm": 23.149492263793945,
+      "learning_rate": 4.0659105499679e-07,
+      "loss": 0.2818,
+      "step": 4250
+    },
+    {
+      "epoch": 2.4696707105719238,
+      "grad_norm": 43.19930648803711,
+      "learning_rate": 3.9589129039161134e-07,
+      "loss": 0.213,
+      "step": 4275
+    },
+    {
+      "epoch": 2.484113229347198,
+      "grad_norm": 23.671152114868164,
+      "learning_rate": 3.8519152578643266e-07,
+      "loss": 0.3698,
+      "step": 4300
+    },
+    {
+      "epoch": 2.4985557481224725,
+      "grad_norm": 88.69607543945312,
+      "learning_rate": 3.74491761181254e-07,
+      "loss": 0.2968,
+      "step": 4325
+    },
+    {
+      "epoch": 2.512998266897747,
+      "grad_norm": 9.653864860534668,
+      "learning_rate": 3.637919965760753e-07,
+      "loss": 0.2777,
+      "step": 4350
+    },
+    {
+      "epoch": 2.527440785673021,
+      "grad_norm": 11.768026351928711,
+      "learning_rate": 3.5309223197089663e-07,
+      "loss": 0.3236,
+      "step": 4375
+    },
+    {
+      "epoch": 2.541883304448296,
+      "grad_norm": 15.171217918395996,
+      "learning_rate": 3.423924673657179e-07,
+      "loss": 0.3077,
+      "step": 4400
+    },
+    {
+      "epoch": 2.5563258232235704,
+      "grad_norm": 9.879386901855469,
+      "learning_rate": 3.316927027605392e-07,
+      "loss": 0.2684,
+      "step": 4425
+    },
+    {
+      "epoch": 2.5707683419988445,
+      "grad_norm": 9.355985641479492,
+      "learning_rate": 3.2099293815536054e-07,
+      "loss": 0.2526,
+      "step": 4450
+    },
+    {
+      "epoch": 2.585210860774119,
+      "grad_norm": 4.87063455581665,
+      "learning_rate": 3.1029317355018186e-07,
+      "loss": 0.2689,
+      "step": 4475
+    },
+    {
+      "epoch": 2.5996533795493937,
+      "grad_norm": 20.083267211914062,
+      "learning_rate": 2.995934089450032e-07,
+      "loss": 0.259,
+      "step": 4500
+    },
+    {
+      "epoch": 2.614095898324668,
+      "grad_norm": 12.317808151245117,
+      "learning_rate": 2.888936443398245e-07,
+      "loss": 0.2232,
+      "step": 4525
+    },
+    {
+      "epoch": 2.628538417099942,
+      "grad_norm": 28.255945205688477,
+      "learning_rate": 2.7819387973464583e-07,
+      "loss": 0.2466,
+      "step": 4550
+    },
+    {
+      "epoch": 2.6429809358752165,
+      "grad_norm": 72.9136734008789,
+      "learning_rate": 2.674941151294671e-07,
+      "loss": 0.2693,
+      "step": 4575
+    },
+    {
+      "epoch": 2.657423454650491,
+      "grad_norm": 44.20970153808594,
+      "learning_rate": 2.567943505242884e-07,
+      "loss": 0.2527,
+      "step": 4600
+    },
+    {
+      "epoch": 2.6718659734257653,
+      "grad_norm": 17.912519454956055,
+      "learning_rate": 2.460945859191098e-07,
+      "loss": 0.2706,
+      "step": 4625
+    },
+    {
+      "epoch": 2.68630849220104,
+      "grad_norm": 48.64137649536133,
+      "learning_rate": 2.3539482131393107e-07,
+      "loss": 0.2795,
+      "step": 4650
+    },
+    {
+      "epoch": 2.7007510109763144,
+      "grad_norm": 39.6313362121582,
+      "learning_rate": 2.246950567087524e-07,
+      "loss": 0.2669,
+      "step": 4675
+    },
+    {
+      "epoch": 2.7151935297515886,
+      "grad_norm": 12.336877822875977,
+      "learning_rate": 2.139952921035737e-07,
+      "loss": 0.2791,
+      "step": 4700
+    },
+    {
+      "epoch": 2.729636048526863,
+      "grad_norm": 11.376914024353027,
+      "learning_rate": 2.03295527498395e-07,
+      "loss": 0.2599,
+      "step": 4725
+    },
+    {
+      "epoch": 2.7440785673021377,
+      "grad_norm": 26.109207153320312,
+      "learning_rate": 1.9259576289321633e-07,
+      "loss": 0.3112,
+      "step": 4750
+    },
+    {
+      "epoch": 2.758521086077412,
+      "grad_norm": 13.475809097290039,
+      "learning_rate": 1.8189599828803765e-07,
+      "loss": 0.2797,
+      "step": 4775
+    },
+    {
+      "epoch": 2.7729636048526864,
+      "grad_norm": 23.833911895751953,
+      "learning_rate": 1.7119623368285895e-07,
+      "loss": 0.264,
+      "step": 4800
+    },
+    {
+      "epoch": 2.7874061236279606,
+      "grad_norm": 7.037588119506836,
+      "learning_rate": 1.6049646907768027e-07,
+      "loss": 0.3202,
+      "step": 4825
+    },
+    {
+      "epoch": 2.801848642403235,
+      "grad_norm": 36.29332733154297,
+      "learning_rate": 1.497967044725016e-07,
+      "loss": 0.286,
+      "step": 4850
+    },
+    {
+      "epoch": 2.8162911611785093,
+      "grad_norm": 10.196252822875977,
+      "learning_rate": 1.3909693986732292e-07,
+      "loss": 0.3008,
+      "step": 4875
+    },
+    {
+      "epoch": 2.830733679953784,
+      "grad_norm": 27.923114776611328,
+      "learning_rate": 1.283971752621442e-07,
+      "loss": 0.2778,
+      "step": 4900
+    },
+    {
+      "epoch": 2.8451761987290585,
+      "grad_norm": 5.924576282501221,
+      "learning_rate": 1.1769741065696553e-07,
+      "loss": 0.2237,
+      "step": 4925
+    },
+    {
+      "epoch": 2.8596187175043326,
+      "grad_norm": 16.432357788085938,
+      "learning_rate": 1.0699764605178686e-07,
+      "loss": 0.2532,
+      "step": 4950
+    },
+    {
+      "epoch": 2.874061236279607,
+      "grad_norm": 10.846713066101074,
+      "learning_rate": 9.629788144660816e-08,
+      "loss": 0.2277,
+      "step": 4975
+    },
+    {
+      "epoch": 2.8885037550548818,
+      "grad_norm": 21.979785919189453,
+      "learning_rate": 8.559811684142947e-08,
+      "loss": 0.3065,
+      "step": 5000
+    },
+    {
+      "epoch": 2.902946273830156,
+      "grad_norm": 17.25764274597168,
+      "learning_rate": 7.48983522362508e-08,
+      "loss": 0.2449,
+      "step": 5025
+    },
+    {
+      "epoch": 2.9173887926054305,
+      "grad_norm": 20.356718063354492,
+      "learning_rate": 6.41985876310721e-08,
+      "loss": 0.2357,
+      "step": 5050
+    },
+    {
+      "epoch": 2.9318313113807046,
+      "grad_norm": 18.828088760375977,
+      "learning_rate": 5.349882302589343e-08,
+      "loss": 0.2329,
+      "step": 5075
+    },
+    {
+      "epoch": 2.946273830155979,
+      "grad_norm": 10.398417472839355,
+      "learning_rate": 4.279905842071474e-08,
+      "loss": 0.2279,
+      "step": 5100
+    },
+    {
+      "epoch": 2.9607163489312533,
+      "grad_norm": 40.09988784790039,
+      "learning_rate": 3.209929381553605e-08,
+      "loss": 0.2337,
+      "step": 5125
+    },
+    {
+      "epoch": 2.975158867706528,
+      "grad_norm": 22.620283126831055,
+      "learning_rate": 2.139952921035737e-08,
+      "loss": 0.2783,
+      "step": 5150
+    },
+    {
+      "epoch": 2.9896013864818025,
+      "grad_norm": 33.585792541503906,
+      "learning_rate": 1.0699764605178684e-08,
+      "loss": 0.2726,
+      "step": 5175
+    },
+    {
+      "epoch": 3.0,
+      "eval_explained_variance": 0.7570163011550903,
+      "eval_loss": 0.282262921333313,
+      "eval_mae": 0.4189736545085907,
+      "eval_mse": 0.2820460796356201,
+      "eval_r2": 0.74436353679844,
+      "eval_rmse": 0.5310801267623901,
+      "eval_runtime": 80.5385,
+      "eval_samples_per_second": 42.986,
+      "eval_steps_per_second": 2.694,
+      "step": 5193
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 5193,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 7.743257397795226e+16,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-5193/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:270da9b099b2d02a736480bd012939e9b5f02ea692f21c4e08c4925dc1f458dc
+size 5048

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_name_or_path": "microsoft/deberta-v3-large",
+  "_num_labels": 1,
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "target"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "target": 0
+  },
+  "layer_norm_eps": 1e-07,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d68d3238d7e01448edfe7b1983dcf19cc58c6bfbb7628f33a97a5690b6e914d
+size 1740300340

runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714402394.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:129c6f6a91995200e4332cddbe1b7a5c6e4e8894bfdbe4f4a000e8a5d32483fd
-size 46485

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1d3054238590a97382b063b811087cd93da94519af24987471b0223a836fdde
+size 50525

runs/Apr29_14-53-13_r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p/events.out.tfevents.1714407850.r-abhishek-autotrain-1bfjaa9n-00d8c-hkw4p.464.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81b9b3dff0d2329581b485dd6e9867790b2a9b813bd88a0ec2b931a09c093664
+size 609

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:270da9b099b2d02a736480bd012939e9b5f02ea692f21c4e08c4925dc1f458dc
+size 5048

training_params.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "data_path": "autotrain-m96nh-snymb/autotrain-data",
+    "model": "microsoft/deberta-v3-large",
+    "lr": 2e-06,
+    "epochs": 3,
+    "max_seq_length": 1024,
+    "batch_size": 2,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-m96nh-snymb",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "evaluation_strategy": "epoch",
+    "username": "abhishek",
+    "log": "tensorboard"
+}