Upload 11 files

Browse files

Files changed (11) hide show

config.json +26 -0
generation_config.json +7 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +1 -0
spiece.model +3 -0
tokenizer_config.json +1 -0
trainer_state.json +280 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "/home/patrick/hugging_face/t5/mt5-large",
+  "architectures": [
+    "MT5ForConditionalGeneration"
+  ],
+  "d_ff": 2816,
+  "d_kv": 64,
+  "d_model": 1024,
+  "decoder_start_token_id": 0,
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "mt5",
+  "num_decoder_layers": 24,
+  "num_heads": 16,
+  "num_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "vocab_size": 250112
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.40.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76ff7cfb57efc875e27624000392814a16dab31dbb0429a193ece8167e41af4d
+size 4918393736

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:208109da017f8c1520ebf394ddc447b6f51b5305d691fd6f44a61d944cb896a1
+size 9837122384

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c521e9deb3664d0cd507f97c9cf749f79420f711bf092c1d58e3c467d3b6fb7b
+size 13990

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b7f151c12d99847478a0194bda24e6f18590939dbf74d0bbc868de7ff0c88f5
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
+size 4309802

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 0, "additional_special_tokens": null, "special_tokens_map_file": "/home/patrick/.cache/torch/transformers/685ac0ca8568ec593a48b61b0a3c272beee9bc194a3c7241d15dcadb5f875e53.f76030f3ec1b96a8199b2593390c610e76ca8028ef3d24680000619ffb646276", "tokenizer_file": null, "name_or_path": "google/mt5-small"}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,280 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.893048128342246,
+  "eval_steps": 500,
+  "global_step": 18500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.26737967914438504,
+      "grad_norm": 2.1504368782043457,
+      "learning_rate": 4.8663101604278076e-05,
+      "loss": 7.6189,
+      "step": 500
+    },
+    {
+      "epoch": 0.5347593582887701,
+      "grad_norm": 0.910918653011322,
+      "learning_rate": 4.732620320855615e-05,
+      "loss": 0.7574,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8021390374331551,
+      "grad_norm": 0.7778192162513733,
+      "learning_rate": 4.598930481283423e-05,
+      "loss": 0.681,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0695187165775402,
+      "grad_norm": 0.9133714437484741,
+      "learning_rate": 4.4652406417112304e-05,
+      "loss": 0.6446,
+      "step": 2000
+    },
+    {
+      "epoch": 1.3368983957219251,
+      "grad_norm": 1.6034926176071167,
+      "learning_rate": 4.331550802139038e-05,
+      "loss": 0.5912,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6042780748663101,
+      "grad_norm": 0.7972973585128784,
+      "learning_rate": 4.197860962566845e-05,
+      "loss": 0.5784,
+      "step": 3000
+    },
+    {
+      "epoch": 1.8716577540106951,
+      "grad_norm": 1.0540673732757568,
+      "learning_rate": 4.0641711229946525e-05,
+      "loss": 0.5728,
+      "step": 3500
+    },
+    {
+      "epoch": 2.1390374331550803,
+      "grad_norm": 1.4703896045684814,
+      "learning_rate": 3.93048128342246e-05,
+      "loss": 0.5353,
+      "step": 4000
+    },
+    {
+      "epoch": 2.406417112299465,
+      "grad_norm": 1.052030086517334,
+      "learning_rate": 3.796791443850268e-05,
+      "loss": 0.5068,
+      "step": 4500
+    },
+    {
+      "epoch": 2.6737967914438503,
+      "grad_norm": 1.119246006011963,
+      "learning_rate": 3.6631016042780753e-05,
+      "loss": 0.5069,
+      "step": 5000
+    },
+    {
+      "epoch": 2.9411764705882355,
+      "grad_norm": 0.73586505651474,
+      "learning_rate": 3.529411764705883e-05,
+      "loss": 0.5034,
+      "step": 5500
+    },
+    {
+      "epoch": 3.2085561497326203,
+      "grad_norm": 0.8226745128631592,
+      "learning_rate": 3.39572192513369e-05,
+      "loss": 0.4687,
+      "step": 6000
+    },
+    {
+      "epoch": 3.4759358288770055,
+      "grad_norm": 0.8904627561569214,
+      "learning_rate": 3.2620320855614975e-05,
+      "loss": 0.4591,
+      "step": 6500
+    },
+    {
+      "epoch": 3.7433155080213902,
+      "grad_norm": 0.7578887939453125,
+      "learning_rate": 3.128342245989305e-05,
+      "loss": 0.4589,
+      "step": 7000
+    },
+    {
+      "epoch": 4.010695187165775,
+      "grad_norm": 0.8590931296348572,
+      "learning_rate": 2.9946524064171122e-05,
+      "loss": 0.4498,
+      "step": 7500
+    },
+    {
+      "epoch": 4.278074866310161,
+      "grad_norm": 0.8416089415550232,
+      "learning_rate": 2.8609625668449196e-05,
+      "loss": 0.4155,
+      "step": 8000
+    },
+    {
+      "epoch": 4.545454545454545,
+      "grad_norm": 0.7122125029563904,
+      "learning_rate": 2.7272727272727273e-05,
+      "loss": 0.4173,
+      "step": 8500
+    },
+    {
+      "epoch": 4.81283422459893,
+      "grad_norm": 0.7922715544700623,
+      "learning_rate": 2.5935828877005347e-05,
+      "loss": 0.4144,
+      "step": 9000
+    },
+    {
+      "epoch": 5.080213903743315,
+      "grad_norm": 1.4970874786376953,
+      "learning_rate": 2.4598930481283424e-05,
+      "loss": 0.4128,
+      "step": 9500
+    },
+    {
+      "epoch": 5.347593582887701,
+      "grad_norm": 0.9522221088409424,
+      "learning_rate": 2.32620320855615e-05,
+      "loss": 0.3862,
+      "step": 10000
+    },
+    {
+      "epoch": 5.614973262032086,
+      "grad_norm": 0.758745551109314,
+      "learning_rate": 2.192513368983957e-05,
+      "loss": 0.3889,
+      "step": 10500
+    },
+    {
+      "epoch": 5.882352941176471,
+      "grad_norm": 0.761408805847168,
+      "learning_rate": 2.058823529411765e-05,
+      "loss": 0.3787,
+      "step": 11000
+    },
+    {
+      "epoch": 6.149732620320855,
+      "grad_norm": 0.9881584644317627,
+      "learning_rate": 1.9251336898395722e-05,
+      "loss": 0.3762,
+      "step": 11500
+    },
+    {
+      "epoch": 6.4171122994652405,
+      "grad_norm": 0.6850549578666687,
+      "learning_rate": 1.7914438502673796e-05,
+      "loss": 0.3551,
+      "step": 12000
+    },
+    {
+      "epoch": 6.684491978609626,
+      "grad_norm": 1.1036587953567505,
+      "learning_rate": 1.6577540106951873e-05,
+      "loss": 0.3639,
+      "step": 12500
+    },
+    {
+      "epoch": 6.951871657754011,
+      "grad_norm": 1.33849036693573,
+      "learning_rate": 1.5240641711229947e-05,
+      "loss": 0.3615,
+      "step": 13000
+    },
+    {
+      "epoch": 7.219251336898395,
+      "grad_norm": 1.690130352973938,
+      "learning_rate": 1.3903743315508022e-05,
+      "loss": 0.3397,
+      "step": 13500
+    },
+    {
+      "epoch": 7.4866310160427805,
+      "grad_norm": 1.0579711198806763,
+      "learning_rate": 1.2566844919786098e-05,
+      "loss": 0.3454,
+      "step": 14000
+    },
+    {
+      "epoch": 7.754010695187166,
+      "grad_norm": 1.0972784757614136,
+      "learning_rate": 1.1229946524064172e-05,
+      "loss": 0.341,
+      "step": 14500
+    },
+    {
+      "epoch": 8.02139037433155,
+      "grad_norm": 0.9292057752609253,
+      "learning_rate": 9.893048128342247e-06,
+      "loss": 0.3458,
+      "step": 15000
+    },
+    {
+      "epoch": 8.288770053475936,
+      "grad_norm": 0.9827554821968079,
+      "learning_rate": 8.556149732620321e-06,
+      "loss": 0.3244,
+      "step": 15500
+    },
+    {
+      "epoch": 8.556149732620321,
+      "grad_norm": 1.4099150896072388,
+      "learning_rate": 7.2192513368983955e-06,
+      "loss": 0.3274,
+      "step": 16000
+    },
+    {
+      "epoch": 8.823529411764707,
+      "grad_norm": 1.0601311922073364,
+      "learning_rate": 5.882352941176471e-06,
+      "loss": 0.3267,
+      "step": 16500
+    },
+    {
+      "epoch": 9.090909090909092,
+      "grad_norm": 0.8129891157150269,
+      "learning_rate": 4.5454545454545455e-06,
+      "loss": 0.325,
+      "step": 17000
+    },
+    {
+      "epoch": 9.358288770053475,
+      "grad_norm": 0.862271249294281,
+      "learning_rate": 3.208556149732621e-06,
+      "loss": 0.3234,
+      "step": 17500
+    },
+    {
+      "epoch": 9.62566844919786,
+      "grad_norm": 0.8069100379943848,
+      "learning_rate": 1.8716577540106951e-06,
+      "loss": 0.3126,
+      "step": 18000
+    },
+    {
+      "epoch": 9.893048128342246,
+      "grad_norm": 0.9965262413024902,
+      "learning_rate": 5.347593582887701e-07,
+      "loss": 0.318,
+      "step": 18500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 18700,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.7282440998912e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9e2436ae0d1940c88324306ad3eccf2d5afa5a3371ceae038915339b8d626fc
+size 4920