LRJ1981 commited on May 9

Commit

94750da

•

1 Parent(s): 22030af

Upload folder using huggingface_hub

Browse files

Files changed (26) hide show

README.md +35 -0
checkpoint-4676/config.json +61 -0
checkpoint-4676/generation_config.json +16 -0
checkpoint-4676/model.safetensors +3 -0
checkpoint-4676/optimizer.pt +3 -0
checkpoint-4676/rng_state.pth +3 -0
checkpoint-4676/scheduler.pt +3 -0
checkpoint-4676/source.spm +0 -0
checkpoint-4676/special_tokens_map.json +5 -0
checkpoint-4676/target.spm +0 -0
checkpoint-4676/tokenizer_config.json +38 -0
checkpoint-4676/trainer_state.json +1421 -0
checkpoint-4676/training_args.bin +3 -0
checkpoint-4676/vocab.json +0 -0
config.json +61 -0
generation_config.json +16 -0
model.safetensors +3 -0
runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715259405.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.0 +2 -2
runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715264442.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.1 +3 -0
source.spm +0 -0
special_tokens_map.json +5 -0
target.spm +0 -0
tokenizer_config.json +38 -0
training_args.bin +3 -0
training_params.json +35 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+tags:
+- autotrain
+- text2text-generation
+widget:
+- text: "I love AutoTrain"
+datasets:
+- autotrain-ve993-lub6e/autotrain-data
+---
+# Model Trained Using AutoTrain
+- Problem type: Seq2Seq
+## Validation Metrics
+loss: 1.5230909585952759
+rouge1: 55.7716
+rouge2: 33.0852
+rougeL: 51.3404
+rougeLsum: 51.4618
+gen_len: 59.7293
+runtime: 430.9744
+samples_per_second: 3.875
+steps_per_second: 0.195
+: 9.0

checkpoint-4676/config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "Helsinki-NLP/opus-mt-da-en",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bad_words_ids": [
+    [
+      58929
+    ]
+  ],
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 58929,
+  "decoder_vocab_size": 58930,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": 512,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 58929,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "use_cache": false,
+  "vocab_size": 58930
+}

checkpoint-4676/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      58929
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 58929,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 58929,
+  "renormalize_logits": true,
+  "transformers_version": "4.40.1"
+}

checkpoint-4676/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd59557e95ee48f4799a183b95004f37eeddb04d1f4125cf6d859fc60cedd5cd
+size 297507400

checkpoint-4676/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69b583438c2a94795110470ffcdf0647bfc6d50f82c9d0d80f54ade77810a38c
+size 594696826

checkpoint-4676/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d145668ab859cd1763e2dc7454c2ea3a12250310343ad9a384f87cbdbc54f09
+size 14244

checkpoint-4676/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08dd588e72eb2629380818fe636ae342302694515239edc923e495c1e8ad34cd
+size 1064

checkpoint-4676/source.spm ADDED Viewed

Binary file (820 kB). View file

checkpoint-4676/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

checkpoint-4676/target.spm ADDED Viewed

Binary file (788 kB). View file

checkpoint-4676/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "58929": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "da",
+  "sp_model_kwargs": {},
+  "target_lang": "en",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

checkpoint-4676/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1421 @@

+{
+  "best_metric": 1.5230909585952759,
+  "best_model_checkpoint": "autotrain-ve993-lub6e/checkpoint-4676",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 4676,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0374251497005988,
+      "grad_norm": 9.912540435791016,
+      "learning_rate": 1.7964071856287426e-06,
+      "loss": 5.9541,
+      "step": 25
+    },
+    {
+      "epoch": 0.0748502994011976,
+      "grad_norm": 8.57716178894043,
+      "learning_rate": 3.6676646706586825e-06,
+      "loss": 5.6517,
+      "step": 50
+    },
+    {
+      "epoch": 0.1122754491017964,
+      "grad_norm": 6.1179351806640625,
+      "learning_rate": 5.538922155688623e-06,
+      "loss": 5.3189,
+      "step": 75
+    },
+    {
+      "epoch": 0.1497005988023952,
+      "grad_norm": 6.469350337982178,
+      "learning_rate": 7.410179640718563e-06,
+      "loss": 4.9823,
+      "step": 100
+    },
+    {
+      "epoch": 0.18712574850299402,
+      "grad_norm": 6.627992153167725,
+      "learning_rate": 9.281437125748502e-06,
+      "loss": 4.704,
+      "step": 125
+    },
+    {
+      "epoch": 0.2245508982035928,
+      "grad_norm": 5.041338920593262,
+      "learning_rate": 1.1152694610778444e-05,
+      "loss": 4.4379,
+      "step": 150
+    },
+    {
+      "epoch": 0.2619760479041916,
+      "grad_norm": 6.7107977867126465,
+      "learning_rate": 1.3023952095808384e-05,
+      "loss": 4.2558,
+      "step": 175
+    },
+    {
+      "epoch": 0.2994011976047904,
+      "grad_norm": 6.578221797943115,
+      "learning_rate": 1.4895209580838324e-05,
+      "loss": 4.068,
+      "step": 200
+    },
+    {
+      "epoch": 0.33682634730538924,
+      "grad_norm": 6.045902252197266,
+      "learning_rate": 1.6766467065868263e-05,
+      "loss": 3.9557,
+      "step": 225
+    },
+    {
+      "epoch": 0.37425149700598803,
+      "grad_norm": 5.811893463134766,
+      "learning_rate": 1.8637724550898206e-05,
+      "loss": 3.8036,
+      "step": 250
+    },
+    {
+      "epoch": 0.4116766467065868,
+      "grad_norm": 5.545330047607422,
+      "learning_rate": 2.0508982035928146e-05,
+      "loss": 3.7448,
+      "step": 275
+    },
+    {
+      "epoch": 0.4491017964071856,
+      "grad_norm": 5.683607578277588,
+      "learning_rate": 2.2380239520958086e-05,
+      "loss": 3.587,
+      "step": 300
+    },
+    {
+      "epoch": 0.4865269461077844,
+      "grad_norm": 6.531998157501221,
+      "learning_rate": 2.4251497005988023e-05,
+      "loss": 3.5058,
+      "step": 325
+    },
+    {
+      "epoch": 0.5239520958083832,
+      "grad_norm": 5.926412105560303,
+      "learning_rate": 2.6122754491017963e-05,
+      "loss": 3.3958,
+      "step": 350
+    },
+    {
+      "epoch": 0.561377245508982,
+      "grad_norm": 5.820963382720947,
+      "learning_rate": 2.7994011976047907e-05,
+      "loss": 3.3647,
+      "step": 375
+    },
+    {
+      "epoch": 0.5988023952095808,
+      "grad_norm": 6.341830253601074,
+      "learning_rate": 2.9865269461077843e-05,
+      "loss": 3.2207,
+      "step": 400
+    },
+    {
+      "epoch": 0.6362275449101796,
+      "grad_norm": 5.764517784118652,
+      "learning_rate": 3.1736526946107784e-05,
+      "loss": 3.2476,
+      "step": 425
+    },
+    {
+      "epoch": 0.6736526946107785,
+      "grad_norm": 5.8953962326049805,
+      "learning_rate": 3.360778443113773e-05,
+      "loss": 3.1193,
+      "step": 450
+    },
+    {
+      "epoch": 0.7110778443113772,
+      "grad_norm": 5.671535968780518,
+      "learning_rate": 3.5479041916167664e-05,
+      "loss": 3.0553,
+      "step": 475
+    },
+    {
+      "epoch": 0.7485029940119761,
+      "grad_norm": 5.246084690093994,
+      "learning_rate": 3.735029940119761e-05,
+      "loss": 2.9964,
+      "step": 500
+    },
+    {
+      "epoch": 0.7859281437125748,
+      "grad_norm": 6.070059299468994,
+      "learning_rate": 3.9221556886227544e-05,
+      "loss": 2.9678,
+      "step": 525
+    },
+    {
+      "epoch": 0.8233532934131736,
+      "grad_norm": 5.288054943084717,
+      "learning_rate": 4.109281437125749e-05,
+      "loss": 2.9111,
+      "step": 550
+    },
+    {
+      "epoch": 0.8607784431137725,
+      "grad_norm": 6.651124000549316,
+      "learning_rate": 4.2964071856287424e-05,
+      "loss": 2.878,
+      "step": 575
+    },
+    {
+      "epoch": 0.8982035928143712,
+      "grad_norm": 5.776132106781006,
+      "learning_rate": 4.483532934131737e-05,
+      "loss": 2.8239,
+      "step": 600
+    },
+    {
+      "epoch": 0.9356287425149701,
+      "grad_norm": 5.356322288513184,
+      "learning_rate": 4.670658682634731e-05,
+      "loss": 2.7097,
+      "step": 625
+    },
+    {
+      "epoch": 0.9730538922155688,
+      "grad_norm": 5.361959457397461,
+      "learning_rate": 4.857784431137725e-05,
+      "loss": 2.7526,
+      "step": 650
+    },
+    {
+      "epoch": 1.0,
+      "eval_gen_len": 60.8838,
+      "eval_loss": 2.430297613143921,
+      "eval_rouge1": 43.4684,
+      "eval_rouge2": 19.1354,
+      "eval_rougeL": 38.9925,
+      "eval_rougeLsum": 39.1478,
+      "eval_runtime": 483.1059,
+      "eval_samples_per_second": 3.457,
+      "eval_steps_per_second": 0.174,
+      "step": 668
+    },
+    {
+      "epoch": 1.0104790419161676,
+      "grad_norm": 6.461863040924072,
+      "learning_rate": 4.99500998003992e-05,
+      "loss": 2.6027,
+      "step": 675
+    },
+    {
+      "epoch": 1.0479041916167664,
+      "grad_norm": 4.890101432800293,
+      "learning_rate": 4.9742182302062544e-05,
+      "loss": 2.489,
+      "step": 700
+    },
+    {
+      "epoch": 1.0853293413173652,
+      "grad_norm": 5.590378761291504,
+      "learning_rate": 4.953426480372588e-05,
+      "loss": 2.4426,
+      "step": 725
+    },
+    {
+      "epoch": 1.122754491017964,
+      "grad_norm": 5.469313144683838,
+      "learning_rate": 4.932634730538922e-05,
+      "loss": 2.393,
+      "step": 750
+    },
+    {
+      "epoch": 1.160179640718563,
+      "grad_norm": 5.43513822555542,
+      "learning_rate": 4.9118429807052565e-05,
+      "loss": 2.4025,
+      "step": 775
+    },
+    {
+      "epoch": 1.1976047904191618,
+      "grad_norm": 5.54775333404541,
+      "learning_rate": 4.891051230871591e-05,
+      "loss": 2.3976,
+      "step": 800
+    },
+    {
+      "epoch": 1.2350299401197604,
+      "grad_norm": 4.897847652435303,
+      "learning_rate": 4.8702594810379244e-05,
+      "loss": 2.3977,
+      "step": 825
+    },
+    {
+      "epoch": 1.2724550898203593,
+      "grad_norm": 5.192740440368652,
+      "learning_rate": 4.8494677312042586e-05,
+      "loss": 2.2916,
+      "step": 850
+    },
+    {
+      "epoch": 1.3098802395209581,
+      "grad_norm": 5.499965190887451,
+      "learning_rate": 4.828675981370592e-05,
+      "loss": 2.3031,
+      "step": 875
+    },
+    {
+      "epoch": 1.347305389221557,
+      "grad_norm": 4.9002580642700195,
+      "learning_rate": 4.8078842315369265e-05,
+      "loss": 2.2754,
+      "step": 900
+    },
+    {
+      "epoch": 1.3847305389221556,
+      "grad_norm": 4.921010494232178,
+      "learning_rate": 4.78709248170326e-05,
+      "loss": 2.2357,
+      "step": 925
+    },
+    {
+      "epoch": 1.4221556886227544,
+      "grad_norm": 5.515709400177002,
+      "learning_rate": 4.766300731869594e-05,
+      "loss": 2.3363,
+      "step": 950
+    },
+    {
+      "epoch": 1.4595808383233533,
+      "grad_norm": 5.200982093811035,
+      "learning_rate": 4.7455089820359286e-05,
+      "loss": 2.2036,
+      "step": 975
+    },
+    {
+      "epoch": 1.4970059880239521,
+      "grad_norm": 5.73722505569458,
+      "learning_rate": 4.724717232202263e-05,
+      "loss": 2.2285,
+      "step": 1000
+    },
+    {
+      "epoch": 1.534431137724551,
+      "grad_norm": 5.761080265045166,
+      "learning_rate": 4.7039254823685964e-05,
+      "loss": 2.1971,
+      "step": 1025
+    },
+    {
+      "epoch": 1.5718562874251498,
+      "grad_norm": 4.7960524559021,
+      "learning_rate": 4.683133732534931e-05,
+      "loss": 2.1514,
+      "step": 1050
+    },
+    {
+      "epoch": 1.6092814371257484,
+      "grad_norm": 5.1422810554504395,
+      "learning_rate": 4.662341982701264e-05,
+      "loss": 2.1301,
+      "step": 1075
+    },
+    {
+      "epoch": 1.6467065868263473,
+      "grad_norm": 5.624262809753418,
+      "learning_rate": 4.6415502328675985e-05,
+      "loss": 2.0802,
+      "step": 1100
+    },
+    {
+      "epoch": 1.6841317365269461,
+      "grad_norm": 4.837282657623291,
+      "learning_rate": 4.620758483033932e-05,
+      "loss": 2.1048,
+      "step": 1125
+    },
+    {
+      "epoch": 1.7215568862275448,
+      "grad_norm": 5.103292465209961,
+      "learning_rate": 4.5999667332002664e-05,
+      "loss": 1.9987,
+      "step": 1150
+    },
+    {
+      "epoch": 1.7589820359281436,
+      "grad_norm": 5.245999336242676,
+      "learning_rate": 4.5791749833666006e-05,
+      "loss": 2.01,
+      "step": 1175
+    },
+    {
+      "epoch": 1.7964071856287425,
+      "grad_norm": 4.83555269241333,
+      "learning_rate": 4.558383233532935e-05,
+      "loss": 2.0216,
+      "step": 1200
+    },
+    {
+      "epoch": 1.8338323353293413,
+      "grad_norm": 4.849733352661133,
+      "learning_rate": 4.5375914836992685e-05,
+      "loss": 2.0782,
+      "step": 1225
+    },
+    {
+      "epoch": 1.8712574850299402,
+      "grad_norm": 5.461581707000732,
+      "learning_rate": 4.516799733865603e-05,
+      "loss": 2.0555,
+      "step": 1250
+    },
+    {
+      "epoch": 1.908682634730539,
+      "grad_norm": 5.006551742553711,
+      "learning_rate": 4.496007984031936e-05,
+      "loss": 2.0485,
+      "step": 1275
+    },
+    {
+      "epoch": 1.9461077844311379,
+      "grad_norm": 4.840539455413818,
+      "learning_rate": 4.47521623419827e-05,
+      "loss": 1.9808,
+      "step": 1300
+    },
+    {
+      "epoch": 1.9835329341317365,
+      "grad_norm": 4.997801303863525,
+      "learning_rate": 4.454424484364604e-05,
+      "loss": 1.9118,
+      "step": 1325
+    },
+    {
+      "epoch": 2.0,
+      "eval_gen_len": 59.1407,
+      "eval_loss": 1.8773729801177979,
+      "eval_rouge1": 50.6374,
+      "eval_rouge2": 26.6607,
+      "eval_rougeL": 46.1237,
+      "eval_rougeLsum": 46.2222,
+      "eval_runtime": 439.7943,
+      "eval_samples_per_second": 3.797,
+      "eval_steps_per_second": 0.191,
+      "step": 1336
+    },
+    {
+      "epoch": 2.020958083832335,
+      "grad_norm": 4.751077651977539,
+      "learning_rate": 4.433632734530938e-05,
+      "loss": 1.8838,
+      "step": 1350
+    },
+    {
+      "epoch": 2.058383233532934,
+      "grad_norm": 5.055469036102295,
+      "learning_rate": 4.412840984697272e-05,
+      "loss": 1.7158,
+      "step": 1375
+    },
+    {
+      "epoch": 2.095808383233533,
+      "grad_norm": 4.67478609085083,
+      "learning_rate": 4.392049234863606e-05,
+      "loss": 1.7627,
+      "step": 1400
+    },
+    {
+      "epoch": 2.1332335329341316,
+      "grad_norm": 5.1834306716918945,
+      "learning_rate": 4.3712574850299406e-05,
+      "loss": 1.8174,
+      "step": 1425
+    },
+    {
+      "epoch": 2.1706586826347305,
+      "grad_norm": 4.199576377868652,
+      "learning_rate": 4.350465735196274e-05,
+      "loss": 1.7619,
+      "step": 1450
+    },
+    {
+      "epoch": 2.2080838323353293,
+      "grad_norm": 4.928585529327393,
+      "learning_rate": 4.3296739853626084e-05,
+      "loss": 1.7676,
+      "step": 1475
+    },
+    {
+      "epoch": 2.245508982035928,
+      "grad_norm": 4.089141368865967,
+      "learning_rate": 4.308882235528942e-05,
+      "loss": 1.7588,
+      "step": 1500
+    },
+    {
+      "epoch": 2.282934131736527,
+      "grad_norm": 5.2093706130981445,
+      "learning_rate": 4.288090485695276e-05,
+      "loss": 1.7744,
+      "step": 1525
+    },
+    {
+      "epoch": 2.320359281437126,
+      "grad_norm": 4.752145767211914,
+      "learning_rate": 4.26729873586161e-05,
+      "loss": 1.7208,
+      "step": 1550
+    },
+    {
+      "epoch": 2.3577844311377247,
+      "grad_norm": 4.885648727416992,
+      "learning_rate": 4.246506986027944e-05,
+      "loss": 1.7038,
+      "step": 1575
+    },
+    {
+      "epoch": 2.3952095808383236,
+      "grad_norm": 5.004818439483643,
+      "learning_rate": 4.2257152361942784e-05,
+      "loss": 1.7129,
+      "step": 1600
+    },
+    {
+      "epoch": 2.432634730538922,
+      "grad_norm": 4.234861373901367,
+      "learning_rate": 4.2049234863606126e-05,
+      "loss": 1.6684,
+      "step": 1625
+    },
+    {
+      "epoch": 2.470059880239521,
+      "grad_norm": 4.313364028930664,
+      "learning_rate": 4.184131736526946e-05,
+      "loss": 1.6709,
+      "step": 1650
+    },
+    {
+      "epoch": 2.5074850299401197,
+      "grad_norm": 5.006948471069336,
+      "learning_rate": 4.1633399866932805e-05,
+      "loss": 1.7118,
+      "step": 1675
+    },
+    {
+      "epoch": 2.5449101796407185,
+      "grad_norm": 5.49020528793335,
+      "learning_rate": 4.142548236859614e-05,
+      "loss": 1.7034,
+      "step": 1700
+    },
+    {
+      "epoch": 2.5823353293413174,
+      "grad_norm": 4.611372470855713,
+      "learning_rate": 4.121756487025948e-05,
+      "loss": 1.6309,
+      "step": 1725
+    },
+    {
+      "epoch": 2.6197604790419162,
+      "grad_norm": 4.116524696350098,
+      "learning_rate": 4.100964737192282e-05,
+      "loss": 1.654,
+      "step": 1750
+    },
+    {
+      "epoch": 2.657185628742515,
+      "grad_norm": 4.763690948486328,
+      "learning_rate": 4.080172987358616e-05,
+      "loss": 1.6983,
+      "step": 1775
+    },
+    {
+      "epoch": 2.694610778443114,
+      "grad_norm": 3.969961404800415,
+      "learning_rate": 4.0593812375249504e-05,
+      "loss": 1.666,
+      "step": 1800
+    },
+    {
+      "epoch": 2.7320359281437128,
+      "grad_norm": 4.265604496002197,
+      "learning_rate": 4.038589487691285e-05,
+      "loss": 1.7378,
+      "step": 1825
+    },
+    {
+      "epoch": 2.769461077844311,
+      "grad_norm": 4.415422439575195,
+      "learning_rate": 4.017797737857618e-05,
+      "loss": 1.6482,
+      "step": 1850
+    },
+    {
+      "epoch": 2.80688622754491,
+      "grad_norm": 4.685695648193359,
+      "learning_rate": 3.9970059880239525e-05,
+      "loss": 1.6576,
+      "step": 1875
+    },
+    {
+      "epoch": 2.844311377245509,
+      "grad_norm": 4.4989399909973145,
+      "learning_rate": 3.976214238190286e-05,
+      "loss": 1.6885,
+      "step": 1900
+    },
+    {
+      "epoch": 2.8817365269461077,
+      "grad_norm": 4.608761310577393,
+      "learning_rate": 3.9554224883566204e-05,
+      "loss": 1.6823,
+      "step": 1925
+    },
+    {
+      "epoch": 2.9191616766467066,
+      "grad_norm": 4.4332475662231445,
+      "learning_rate": 3.934630738522954e-05,
+      "loss": 1.6424,
+      "step": 1950
+    },
+    {
+      "epoch": 2.9565868263473054,
+      "grad_norm": 4.602639198303223,
+      "learning_rate": 3.913838988689288e-05,
+      "loss": 1.6947,
+      "step": 1975
+    },
+    {
+      "epoch": 2.9940119760479043,
+      "grad_norm": 4.250889301300049,
+      "learning_rate": 3.8930472388556225e-05,
+      "loss": 1.6076,
+      "step": 2000
+    },
+    {
+      "epoch": 3.0,
+      "eval_gen_len": 59.3683,
+      "eval_loss": 1.6866850852966309,
+      "eval_rouge1": 53.1998,
+      "eval_rouge2": 29.7919,
+      "eval_rougeL": 48.8395,
+      "eval_rougeLsum": 48.9549,
+      "eval_runtime": 382.1142,
+      "eval_samples_per_second": 4.37,
+      "eval_steps_per_second": 0.22,
+      "step": 2004
+    },
+    {
+      "epoch": 3.031437125748503,
+      "grad_norm": 5.463351249694824,
+      "learning_rate": 3.872255489021957e-05,
+      "loss": 1.4482,
+      "step": 2025
+    },
+    {
+      "epoch": 3.068862275449102,
+      "grad_norm": 4.865541458129883,
+      "learning_rate": 3.8514637391882903e-05,
+      "loss": 1.4829,
+      "step": 2050
+    },
+    {
+      "epoch": 3.106287425149701,
+      "grad_norm": 4.7900190353393555,
+      "learning_rate": 3.8306719893546246e-05,
+      "loss": 1.3993,
+      "step": 2075
+    },
+    {
+      "epoch": 3.143712574850299,
+      "grad_norm": 4.584388732910156,
+      "learning_rate": 3.809880239520958e-05,
+      "loss": 1.4461,
+      "step": 2100
+    },
+    {
+      "epoch": 3.181137724550898,
+      "grad_norm": 4.259522438049316,
+      "learning_rate": 3.7890884896872925e-05,
+      "loss": 1.4739,
+      "step": 2125
+    },
+    {
+      "epoch": 3.218562874251497,
+      "grad_norm": 4.623165130615234,
+      "learning_rate": 3.768296739853626e-05,
+      "loss": 1.4349,
+      "step": 2150
+    },
+    {
+      "epoch": 3.2559880239520957,
+      "grad_norm": 4.376612186431885,
+      "learning_rate": 3.74750499001996e-05,
+      "loss": 1.3954,
+      "step": 2175
+    },
+    {
+      "epoch": 3.2934131736526946,
+      "grad_norm": 5.000776290893555,
+      "learning_rate": 3.726713240186294e-05,
+      "loss": 1.4457,
+      "step": 2200
+    },
+    {
+      "epoch": 3.3308383233532934,
+      "grad_norm": 4.057362079620361,
+      "learning_rate": 3.705921490352628e-05,
+      "loss": 1.4247,
+      "step": 2225
+    },
+    {
+      "epoch": 3.3682634730538923,
+      "grad_norm": 5.192569732666016,
+      "learning_rate": 3.6851297405189624e-05,
+      "loss": 1.4742,
+      "step": 2250
+    },
+    {
+      "epoch": 3.405688622754491,
+      "grad_norm": 4.622374534606934,
+      "learning_rate": 3.664337990685297e-05,
+      "loss": 1.4153,
+      "step": 2275
+    },
+    {
+      "epoch": 3.44311377245509,
+      "grad_norm": 4.387070655822754,
+      "learning_rate": 3.64354624085163e-05,
+      "loss": 1.3851,
+      "step": 2300
+    },
+    {
+      "epoch": 3.480538922155689,
+      "grad_norm": 4.561577796936035,
+      "learning_rate": 3.6227544910179645e-05,
+      "loss": 1.4805,
+      "step": 2325
+    },
+    {
+      "epoch": 3.5179640718562872,
+      "grad_norm": 4.737853527069092,
+      "learning_rate": 3.601962741184298e-05,
+      "loss": 1.4177,
+      "step": 2350
+    },
+    {
+      "epoch": 3.555389221556886,
+      "grad_norm": 3.9790077209472656,
+      "learning_rate": 3.5811709913506324e-05,
+      "loss": 1.436,
+      "step": 2375
+    },
+    {
+      "epoch": 3.592814371257485,
+      "grad_norm": 4.334903717041016,
+      "learning_rate": 3.560379241516966e-05,
+      "loss": 1.4621,
+      "step": 2400
+    },
+    {
+      "epoch": 3.6302395209580838,
+      "grad_norm": 5.124073505401611,
+      "learning_rate": 3.5395874916833e-05,
+      "loss": 1.446,
+      "step": 2425
+    },
+    {
+      "epoch": 3.6676646706586826,
+      "grad_norm": 4.298630237579346,
+      "learning_rate": 3.5187957418496345e-05,
+      "loss": 1.4052,
+      "step": 2450
+    },
+    {
+      "epoch": 3.7050898203592815,
+      "grad_norm": 4.598848819732666,
+      "learning_rate": 3.498003992015968e-05,
+      "loss": 1.4273,
+      "step": 2475
+    },
+    {
+      "epoch": 3.7425149700598803,
+      "grad_norm": 4.356764793395996,
+      "learning_rate": 3.477212242182302e-05,
+      "loss": 1.4811,
+      "step": 2500
+    },
+    {
+      "epoch": 3.779940119760479,
+      "grad_norm": 5.15587854385376,
+      "learning_rate": 3.456420492348636e-05,
+      "loss": 1.4153,
+      "step": 2525
+    },
+    {
+      "epoch": 3.817365269461078,
+      "grad_norm": 5.045792579650879,
+      "learning_rate": 3.43562874251497e-05,
+      "loss": 1.4457,
+      "step": 2550
+    },
+    {
+      "epoch": 3.8547904191616764,
+      "grad_norm": 4.461826801300049,
+      "learning_rate": 3.414836992681304e-05,
+      "loss": 1.4368,
+      "step": 2575
+    },
+    {
+      "epoch": 3.8922155688622757,
+      "grad_norm": 5.3425493240356445,
+      "learning_rate": 3.394045242847638e-05,
+      "loss": 1.4049,
+      "step": 2600
+    },
+    {
+      "epoch": 3.929640718562874,
+      "grad_norm": 3.766526699066162,
+      "learning_rate": 3.373253493013972e-05,
+      "loss": 1.4038,
+      "step": 2625
+    },
+    {
+      "epoch": 3.967065868263473,
+      "grad_norm": 4.911787033081055,
+      "learning_rate": 3.3524617431803065e-05,
+      "loss": 1.4198,
+      "step": 2650
+    },
+    {
+      "epoch": 4.0,
+      "eval_gen_len": 59.4353,
+      "eval_loss": 1.6032490730285645,
+      "eval_rouge1": 54.5779,
+      "eval_rouge2": 31.4538,
+      "eval_rougeL": 50.0841,
+      "eval_rougeLsum": 50.2011,
+      "eval_runtime": 399.7762,
+      "eval_samples_per_second": 4.177,
+      "eval_steps_per_second": 0.21,
+      "step": 2672
+    },
+    {
+      "epoch": 4.004491017964072,
+      "grad_norm": 3.8055388927459717,
+      "learning_rate": 3.33166999334664e-05,
+      "loss": 1.4083,
+      "step": 2675
+    },
+    {
+      "epoch": 4.04191616766467,
+      "grad_norm": 4.550738334655762,
+      "learning_rate": 3.3108782435129744e-05,
+      "loss": 1.2661,
+      "step": 2700
+    },
+    {
+      "epoch": 4.0793413173652695,
+      "grad_norm": 4.272867202758789,
+      "learning_rate": 3.290086493679308e-05,
+      "loss": 1.2666,
+      "step": 2725
+    },
+    {
+      "epoch": 4.116766467065868,
+      "grad_norm": 4.6178975105285645,
+      "learning_rate": 3.269294743845642e-05,
+      "loss": 1.2705,
+      "step": 2750
+    },
+    {
+      "epoch": 4.154191616766467,
+      "grad_norm": 4.523929595947266,
+      "learning_rate": 3.248502994011976e-05,
+      "loss": 1.2674,
+      "step": 2775
+    },
+    {
+      "epoch": 4.191616766467066,
+      "grad_norm": 4.166606426239014,
+      "learning_rate": 3.22771124417831e-05,
+      "loss": 1.2285,
+      "step": 2800
+    },
+    {
+      "epoch": 4.229041916167665,
+      "grad_norm": 4.123347759246826,
+      "learning_rate": 3.206919494344644e-05,
+      "loss": 1.2656,
+      "step": 2825
+    },
+    {
+      "epoch": 4.266467065868263,
+      "grad_norm": 4.342975616455078,
+      "learning_rate": 3.1861277445109786e-05,
+      "loss": 1.2482,
+      "step": 2850
+    },
+    {
+      "epoch": 4.303892215568863,
+      "grad_norm": 5.345706462860107,
+      "learning_rate": 3.165335994677312e-05,
+      "loss": 1.253,
+      "step": 2875
+    },
+    {
+      "epoch": 4.341317365269461,
+      "grad_norm": 4.096311092376709,
+      "learning_rate": 3.1445442448436465e-05,
+      "loss": 1.2157,
+      "step": 2900
+    },
+    {
+      "epoch": 4.37874251497006,
+      "grad_norm": 4.385800361633301,
+      "learning_rate": 3.12375249500998e-05,
+      "loss": 1.2742,
+      "step": 2925
+    },
+    {
+      "epoch": 4.416167664670659,
+      "grad_norm": 4.278284549713135,
+      "learning_rate": 3.102960745176314e-05,
+      "loss": 1.2775,
+      "step": 2950
+    },
+    {
+      "epoch": 4.453592814371257,
+      "grad_norm": 4.5966057777404785,
+      "learning_rate": 3.082168995342648e-05,
+      "loss": 1.2118,
+      "step": 2975
+    },
+    {
+      "epoch": 4.491017964071856,
+      "grad_norm": 5.148831367492676,
+      "learning_rate": 3.061377245508982e-05,
+      "loss": 1.2548,
+      "step": 3000
+    },
+    {
+      "epoch": 4.528443113772455,
+      "grad_norm": 4.798081398010254,
+      "learning_rate": 3.040585495675316e-05,
+      "loss": 1.241,
+      "step": 3025
+    },
+    {
+      "epoch": 4.565868263473054,
+      "grad_norm": 4.575997829437256,
+      "learning_rate": 3.0197937458416503e-05,
+      "loss": 1.2761,
+      "step": 3050
+    },
+    {
+      "epoch": 4.6032934131736525,
+      "grad_norm": 4.259399890899658,
+      "learning_rate": 2.999001996007984e-05,
+      "loss": 1.2687,
+      "step": 3075
+    },
+    {
+      "epoch": 4.640718562874252,
+      "grad_norm": 4.092325210571289,
+      "learning_rate": 2.9782102461743182e-05,
+      "loss": 1.2834,
+      "step": 3100
+    },
+    {
+      "epoch": 4.67814371257485,
+      "grad_norm": 4.5153021812438965,
+      "learning_rate": 2.957418496340652e-05,
+      "loss": 1.2335,
+      "step": 3125
+    },
+    {
+      "epoch": 4.7155688622754495,
+      "grad_norm": 4.831052780151367,
+      "learning_rate": 2.9366267465069864e-05,
+      "loss": 1.2693,
+      "step": 3150
+    },
+    {
+      "epoch": 4.752994011976048,
+      "grad_norm": 4.156602382659912,
+      "learning_rate": 2.91583499667332e-05,
+      "loss": 1.2492,
+      "step": 3175
+    },
+    {
+      "epoch": 4.790419161676647,
+      "grad_norm": 4.299253940582275,
+      "learning_rate": 2.8950432468396542e-05,
+      "loss": 1.2583,
+      "step": 3200
+    },
+    {
+      "epoch": 4.827844311377246,
+      "grad_norm": 4.564883232116699,
+      "learning_rate": 2.874251497005988e-05,
+      "loss": 1.2478,
+      "step": 3225
+    },
+    {
+      "epoch": 4.865269461077844,
+      "grad_norm": 4.6624250411987305,
+      "learning_rate": 2.8534597471723224e-05,
+      "loss": 1.2777,
+      "step": 3250
+    },
+    {
+      "epoch": 4.902694610778443,
+      "grad_norm": 4.570215702056885,
+      "learning_rate": 2.832667997338656e-05,
+      "loss": 1.2541,
+      "step": 3275
+    },
+    {
+      "epoch": 4.940119760479042,
+      "grad_norm": 4.049204349517822,
+      "learning_rate": 2.8118762475049902e-05,
+      "loss": 1.2474,
+      "step": 3300
+    },
+    {
+      "epoch": 4.977544910179641,
+      "grad_norm": 3.723095178604126,
+      "learning_rate": 2.7910844976713242e-05,
+      "loss": 1.1911,
+      "step": 3325
+    },
+    {
+      "epoch": 5.0,
+      "eval_gen_len": 58.9928,
+      "eval_loss": 1.555981993675232,
+      "eval_rouge1": 55.2733,
+      "eval_rouge2": 32.3347,
+      "eval_rougeL": 51.0279,
+      "eval_rougeLsum": 51.1368,
+      "eval_runtime": 420.4345,
+      "eval_samples_per_second": 3.972,
+      "eval_steps_per_second": 0.2,
+      "step": 3340
+    },
+    {
+      "epoch": 5.014970059880239,
+      "grad_norm": 4.139627933502197,
+      "learning_rate": 2.7702927478376584e-05,
+      "loss": 1.1937,
+      "step": 3350
+    },
+    {
+      "epoch": 5.052395209580839,
+      "grad_norm": 4.160578727722168,
+      "learning_rate": 2.749500998003992e-05,
+      "loss": 1.0797,
+      "step": 3375
+    },
+    {
+      "epoch": 5.089820359281437,
+      "grad_norm": 4.00601863861084,
+      "learning_rate": 2.7287092481703263e-05,
+      "loss": 1.1202,
+      "step": 3400
+    },
+    {
+      "epoch": 5.127245508982036,
+      "grad_norm": 4.496889114379883,
+      "learning_rate": 2.7079174983366602e-05,
+      "loss": 1.1162,
+      "step": 3425
+    },
+    {
+      "epoch": 5.164670658682635,
+      "grad_norm": 5.572657585144043,
+      "learning_rate": 2.6871257485029945e-05,
+      "loss": 1.0752,
+      "step": 3450
+    },
+    {
+      "epoch": 5.202095808383233,
+      "grad_norm": 4.205285549163818,
+      "learning_rate": 2.666333998669328e-05,
+      "loss": 1.1619,
+      "step": 3475
+    },
+    {
+      "epoch": 5.2395209580838324,
+      "grad_norm": 4.050065517425537,
+      "learning_rate": 2.6455422488356623e-05,
+      "loss": 1.1142,
+      "step": 3500
+    },
+    {
+      "epoch": 5.276946107784431,
+      "grad_norm": 4.341710567474365,
+      "learning_rate": 2.6247504990019962e-05,
+      "loss": 1.1131,
+      "step": 3525
+    },
+    {
+      "epoch": 5.31437125748503,
+      "grad_norm": 4.592411994934082,
+      "learning_rate": 2.6039587491683305e-05,
+      "loss": 1.1312,
+      "step": 3550
+    },
+    {
+      "epoch": 5.3517964071856285,
+      "grad_norm": 4.056641101837158,
+      "learning_rate": 2.583166999334664e-05,
+      "loss": 1.1562,
+      "step": 3575
+    },
+    {
+      "epoch": 5.389221556886228,
+      "grad_norm": 4.492640018463135,
+      "learning_rate": 2.5623752495009983e-05,
+      "loss": 1.1004,
+      "step": 3600
+    },
+    {
+      "epoch": 5.426646706586826,
+      "grad_norm": 4.112987995147705,
+      "learning_rate": 2.5415834996673323e-05,
+      "loss": 1.1045,
+      "step": 3625
+    },
+    {
+      "epoch": 5.4640718562874255,
+      "grad_norm": 4.25972843170166,
+      "learning_rate": 2.5207917498336665e-05,
+      "loss": 1.1705,
+      "step": 3650
+    },
+    {
+      "epoch": 5.501497005988024,
+      "grad_norm": 4.702874660491943,
+      "learning_rate": 2.5e-05,
+      "loss": 1.1236,
+      "step": 3675
+    },
+    {
+      "epoch": 5.538922155688622,
+      "grad_norm": 4.413760662078857,
+      "learning_rate": 2.479208250166334e-05,
+      "loss": 1.0773,
+      "step": 3700
+    },
+    {
+      "epoch": 5.576347305389222,
+      "grad_norm": 4.19527530670166,
+      "learning_rate": 2.458416500332668e-05,
+      "loss": 1.1463,
+      "step": 3725
+    },
+    {
+      "epoch": 5.61377245508982,
+      "grad_norm": 4.174712657928467,
+      "learning_rate": 2.4376247504990022e-05,
+      "loss": 1.1354,
+      "step": 3750
+    },
+    {
+      "epoch": 5.651197604790419,
+      "grad_norm": 4.140392780303955,
+      "learning_rate": 2.416833000665336e-05,
+      "loss": 1.1154,
+      "step": 3775
+    },
+    {
+      "epoch": 5.688622754491018,
+      "grad_norm": 6.118780612945557,
+      "learning_rate": 2.39604125083167e-05,
+      "loss": 1.1698,
+      "step": 3800
+    },
+    {
+      "epoch": 5.726047904191617,
+      "grad_norm": 4.042623043060303,
+      "learning_rate": 2.375249500998004e-05,
+      "loss": 1.0963,
+      "step": 3825
+    },
+    {
+      "epoch": 5.763473053892215,
+      "grad_norm": 4.63490629196167,
+      "learning_rate": 2.3544577511643383e-05,
+      "loss": 1.0947,
+      "step": 3850
+    },
+    {
+      "epoch": 5.800898203592815,
+      "grad_norm": 4.218607425689697,
+      "learning_rate": 2.3336660013306722e-05,
+      "loss": 1.0745,
+      "step": 3875
+    },
+    {
+      "epoch": 5.838323353293413,
+      "grad_norm": 4.426632404327393,
+      "learning_rate": 2.312874251497006e-05,
+      "loss": 1.1408,
+      "step": 3900
+    },
+    {
+      "epoch": 5.875748502994012,
+      "grad_norm": 4.3070478439331055,
+      "learning_rate": 2.29208250166334e-05,
+      "loss": 1.0654,
+      "step": 3925
+    },
+    {
+      "epoch": 5.913173652694611,
+      "grad_norm": 4.342469215393066,
+      "learning_rate": 2.2712907518296743e-05,
+      "loss": 1.148,
+      "step": 3950
+    },
+    {
+      "epoch": 5.950598802395209,
+      "grad_norm": 4.6176862716674805,
+      "learning_rate": 2.2504990019960082e-05,
+      "loss": 1.1233,
+      "step": 3975
+    },
+    {
+      "epoch": 5.9880239520958085,
+      "grad_norm": 4.570769309997559,
+      "learning_rate": 2.229707252162342e-05,
+      "loss": 1.1279,
+      "step": 4000
+    },
+    {
+      "epoch": 6.0,
+      "eval_gen_len": 59.3168,
+      "eval_loss": 1.5299330949783325,
+      "eval_rouge1": 55.6399,
+      "eval_rouge2": 32.8373,
+      "eval_rougeL": 51.2672,
+      "eval_rougeLsum": 51.362,
+      "eval_runtime": 429.7929,
+      "eval_samples_per_second": 3.886,
+      "eval_steps_per_second": 0.195,
+      "step": 4008
+    },
+    {
+      "epoch": 6.025449101796407,
+      "grad_norm": 3.6286568641662598,
+      "learning_rate": 2.208915502328676e-05,
+      "loss": 1.0893,
+      "step": 4025
+    },
+    {
+      "epoch": 6.062874251497006,
+      "grad_norm": 3.461707830429077,
+      "learning_rate": 2.1881237524950103e-05,
+      "loss": 0.9844,
+      "step": 4050
+    },
+    {
+      "epoch": 6.100299401197605,
+      "grad_norm": 4.06862735748291,
+      "learning_rate": 2.1673320026613443e-05,
+      "loss": 1.0244,
+      "step": 4075
+    },
+    {
+      "epoch": 6.137724550898204,
+      "grad_norm": 4.019289016723633,
+      "learning_rate": 2.1465402528276782e-05,
+      "loss": 0.9887,
+      "step": 4100
+    },
+    {
+      "epoch": 6.175149700598802,
+      "grad_norm": 3.8128530979156494,
+      "learning_rate": 2.125748502994012e-05,
+      "loss": 1.0166,
+      "step": 4125
+    },
+    {
+      "epoch": 6.212574850299402,
+      "grad_norm": 4.389101982116699,
+      "learning_rate": 2.104956753160346e-05,
+      "loss": 1.0273,
+      "step": 4150
+    },
+    {
+      "epoch": 6.25,
+      "grad_norm": 4.000133037567139,
+      "learning_rate": 2.0841650033266803e-05,
+      "loss": 0.9862,
+      "step": 4175
+    },
+    {
+      "epoch": 6.287425149700598,
+      "grad_norm": 4.630964756011963,
+      "learning_rate": 2.0633732534930142e-05,
+      "loss": 1.0145,
+      "step": 4200
+    },
+    {
+      "epoch": 6.324850299401198,
+      "grad_norm": 5.184226036071777,
+      "learning_rate": 2.042581503659348e-05,
+      "loss": 1.0428,
+      "step": 4225
+    },
+    {
+      "epoch": 6.362275449101796,
+      "grad_norm": 3.8193106651306152,
+      "learning_rate": 2.021789753825682e-05,
+      "loss": 1.0174,
+      "step": 4250
+    },
+    {
+      "epoch": 6.399700598802395,
+      "grad_norm": 3.856017827987671,
+      "learning_rate": 2.0009980039920163e-05,
+      "loss": 0.9793,
+      "step": 4275
+    },
+    {
+      "epoch": 6.437125748502994,
+      "grad_norm": 4.4376220703125,
+      "learning_rate": 1.98020625415835e-05,
+      "loss": 1.0351,
+      "step": 4300
+    },
+    {
+      "epoch": 6.474550898203593,
+      "grad_norm": 4.181103229522705,
+      "learning_rate": 1.9594145043246838e-05,
+      "loss": 0.9967,
+      "step": 4325
+    },
+    {
+      "epoch": 6.5119760479041915,
+      "grad_norm": 4.993010520935059,
+      "learning_rate": 1.938622754491018e-05,
+      "loss": 0.9976,
+      "step": 4350
+    },
+    {
+      "epoch": 6.549401197604791,
+      "grad_norm": 4.017760753631592,
+      "learning_rate": 1.917831004657352e-05,
+      "loss": 0.9715,
+      "step": 4375
+    },
+    {
+      "epoch": 6.586826347305389,
+      "grad_norm": 4.2860002517700195,
+      "learning_rate": 1.897039254823686e-05,
+      "loss": 1.0256,
+      "step": 4400
+    },
+    {
+      "epoch": 6.624251497005988,
+      "grad_norm": 4.793003082275391,
+      "learning_rate": 1.87624750499002e-05,
+      "loss": 1.025,
+      "step": 4425
+    },
+    {
+      "epoch": 6.661676646706587,
+      "grad_norm": NaN,
+      "learning_rate": 1.8562874251497005e-05,
+      "loss": 1.0235,
+      "step": 4450
+    },
+    {
+      "epoch": 6.699101796407185,
+      "grad_norm": 3.581146717071533,
+      "learning_rate": 1.8354956753160347e-05,
+      "loss": 0.9873,
+      "step": 4475
+    },
+    {
+      "epoch": 6.736526946107785,
+      "grad_norm": 4.64194393157959,
+      "learning_rate": 1.8147039254823687e-05,
+      "loss": 1.023,
+      "step": 4500
+    },
+    {
+      "epoch": 6.773952095808383,
+      "grad_norm": 4.145544052124023,
+      "learning_rate": 1.7939121756487026e-05,
+      "loss": 1.041,
+      "step": 4525
+    },
+    {
+      "epoch": 6.811377245508982,
+      "grad_norm": 3.821073055267334,
+      "learning_rate": 1.7731204258150365e-05,
+      "loss": 1.0401,
+      "step": 4550
+    },
+    {
+      "epoch": 6.848802395209581,
+      "grad_norm": 5.059972286224365,
+      "learning_rate": 1.7523286759813708e-05,
+      "loss": 1.0292,
+      "step": 4575
+    },
+    {
+      "epoch": 6.88622754491018,
+      "grad_norm": 4.337078094482422,
+      "learning_rate": 1.7315369261477047e-05,
+      "loss": 1.0954,
+      "step": 4600
+    },
+    {
+      "epoch": 6.923652694610778,
+      "grad_norm": 4.142930507659912,
+      "learning_rate": 1.7107451763140386e-05,
+      "loss": 1.0082,
+      "step": 4625
+    },
+    {
+      "epoch": 6.961077844311378,
+      "grad_norm": 4.153197765350342,
+      "learning_rate": 1.6899534264803725e-05,
+      "loss": 1.0026,
+      "step": 4650
+    },
+    {
+      "epoch": 6.998502994011976,
+      "grad_norm": 4.053616523742676,
+      "learning_rate": 1.6691616766467068e-05,
+      "loss": 1.0112,
+      "step": 4675
+    },
+    {
+      "epoch": 7.0,
+      "eval_gen_len": 59.7293,
+      "eval_loss": 1.5230909585952759,
+      "eval_rouge1": 55.7716,
+      "eval_rouge2": 33.0852,
+      "eval_rougeL": 51.3404,
+      "eval_rougeLsum": 51.4618,
+      "eval_runtime": 376.4887,
+      "eval_samples_per_second": 4.436,
+      "eval_steps_per_second": 0.223,
+      "step": 4676
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 6680,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 749389564477440.0,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4676/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2938b4ba280bc0635867dead56a341625ab639267b9813724099ada202129ed4
+size 5176

checkpoint-4676/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "Helsinki-NLP/opus-mt-da-en",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "swish",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "MarianMTModel"
+  ],
+  "attention_dropout": 0.0,
+  "bad_words_ids": [
+    [
+      58929
+    ]
+  ],
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 512,
+  "decoder_attention_heads": 8,
+  "decoder_ffn_dim": 2048,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 58929,
+  "decoder_vocab_size": 58930,
+  "dropout": 0.1,
+  "encoder_attention_heads": 8,
+  "encoder_ffn_dim": 2048,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_length": 512,
+  "max_position_embeddings": 512,
+  "model_type": "marian",
+  "normalize_before": false,
+  "normalize_embedding": false,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 58929,
+  "scale_embedding": true,
+  "share_encoder_decoder_embeddings": true,
+  "static_position_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
+  "use_cache": true,
+  "vocab_size": 58930
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "bad_words_ids": [
+    [
+      58929
+    ]
+  ],
+  "bos_token_id": 0,
+  "decoder_start_token_id": 58929,
+  "eos_token_id": 0,
+  "forced_eos_token_id": 0,
+  "max_length": 512,
+  "num_beams": 4,
+  "pad_token_id": 58929,
+  "renormalize_logits": true,
+  "transformers_version": "4.40.1"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd59557e95ee48f4799a183b95004f37eeddb04d1f4125cf6d859fc60cedd5cd
+size 297507400

runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715259405.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37f6e3cb6e8e7af22ce983d8b1772b88b708806b3cb28a020dde88374ccb5328
-size 54717

 version https://git-lfs.github.com/spec/v1
+oid sha256:06ce56e0e751fb923dd23e976becdfb405f22e8292d1a926a36db9241ca64cfa
+size 61293

runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715264442.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47135a7d9a0bdb5fd5f3c4cd92208de0a05b5921b4411294d93c5062f7aa80a9
+size 613

source.spm ADDED Viewed

Binary file (820 kB). View file

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

target.spm ADDED Viewed

Binary file (788 kB). View file

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "58929": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "separate_vocabs": false,
+  "source_lang": "da",
+  "sp_model_kwargs": {},
+  "target_lang": "en",
+  "tokenizer_class": "MarianTokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2938b4ba280bc0635867dead56a341625ab639267b9813724099ada202129ed4
+size 5176

training_params.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+    "data_path": "autotrain-ve993-lub6e/autotrain-data",
+    "model": "Helsinki-NLP/opus-mt-da-en",
+    "username": "LRJ1981",
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "project_name": "autotrain-ve993-lub6e",
+    "push_to_hub": true,
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "lr": 5e-05,
+    "epochs": 10,
+    "max_seq_length": 128,
+    "max_target_length": 128,
+    "batch_size": 10,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "logging_steps": -1,
+    "evaluation_strategy": "epoch",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "peft": false,
+    "quantization": "int4",
+    "lora_r": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "target_modules": "all-linear",
+    "log": "tensorboard"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff