zavora commited on Dec 27, 2024

Commit

97c1b45

verified ·

1 Parent(s): 0a3741a

Upload folder using huggingface_hub

Browse files

Files changed (29) hide show

checkpoint-1000/config.json +73 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +63 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1455/config.json +73 -0
checkpoint-1455/model.safetensors +3 -0
checkpoint-1455/optimizer.pt +3 -0
checkpoint-1455/rng_state.pth +3 -0
checkpoint-1455/scheduler.pt +3 -0
checkpoint-1455/trainer_state.json +63 -0
checkpoint-1455/training_args.bin +3 -0
checkpoint-500/config.json +73 -0
checkpoint-500/model.safetensors +3 -0
checkpoint-500/optimizer.pt +3 -0
checkpoint-500/rng_state.pth +3 -0
checkpoint-500/scheduler.pt +3 -0
checkpoint-500/trainer_state.json +48 -0
checkpoint-500/training_args.bin +3 -0
config.json +73 -0
model.safetensors +3 -0
runs/Dec27_05-33-36_debian/events.out.tfevents.1735277618.debian.3575638.0 +3 -0
runs/Dec27_05-35-26_debian/events.out.tfevents.1735277727.debian.3577983.0 +3 -0
runs/Dec27_05-36-37_debian/events.out.tfevents.1735277798.debian.3579469.0 +3 -0
runs/Dec27_05-52-31_debian/events.out.tfevents.1735278751.debian.3602307.0 +3 -0
runs/Dec27_06-07-26_debian/events.out.tfevents.1735279646.debian.3622194.0 +3 -0
training_args.bin +3 -0

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForSequenceClassification"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d4481ed556f06a1e4ac80f48ae5be58bd971566e0d3ce9a885043429f597224
+size 894023124

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c6c18e4f98c85f02ab9678bc8d0a4bb4a22516882ada79ba6e7e440e726efa2b
+size 1788211642

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b756a7436bd25e612eba97582f5cc5fbb7dba1c491729db3223cb2d1af1df64
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff97c58a527005d014241becb33315d6e6813095a4a6af2a05d1b7b43964faa4
+size 1064

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0618556701030926,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.42261144518852234,
+      "eval_runtime": 6.9522,
+      "eval_samples_per_second": 139.523,
+      "eval_steps_per_second": 17.548,
+      "step": 485
+    },
+    {
+      "epoch": 1.0309278350515463,
+      "grad_norm": 2.8641510009765625,
+      "learning_rate": 1.3127147766323025e-05,
+      "loss": 0.6796,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.38285189867019653,
+      "eval_runtime": 6.8857,
+      "eval_samples_per_second": 140.871,
+      "eval_steps_per_second": 17.718,
+      "step": 970
+    },
+    {
+      "epoch": 2.0618556701030926,
+      "grad_norm": 1.466252326965332,
+      "learning_rate": 6.254295532646049e-06,
+      "loss": 0.3931,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1455,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 886651097220144.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a4f56674f8158c153559cb00991d39e9e1da7258c61422695c423d072e36a2
+size 5368

checkpoint-1455/config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForSequenceClassification"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

checkpoint-1455/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2234af38ef801c0a87312e17ffcad5d716c14d6180b1e340ba9702198a0fd485
+size 894023124

checkpoint-1455/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e4ea6a84c370363dc06c35ae7525f4931f238b8c1b20461c08795f33d5b7efb
+size 1788211642

checkpoint-1455/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad93379655ac469f427b91f7b54483b0039f995d02e80ba16e9c130c496a86e
+size 14244

checkpoint-1455/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:275af64cb0546358bc3d8f4d54e6474cd386fd63d6765ec65e18390a5f3dca00
+size 1064

checkpoint-1455/trainer_state.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 1455,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.42261144518852234,
+      "eval_runtime": 6.9522,
+      "eval_samples_per_second": 139.523,
+      "eval_steps_per_second": 17.548,
+      "step": 485
+    },
+    {
+      "epoch": 1.0309278350515463,
+      "grad_norm": 2.8641510009765625,
+      "learning_rate": 1.3127147766323025e-05,
+      "loss": 0.6796,
+      "step": 500
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.38285189867019653,
+      "eval_runtime": 6.8857,
+      "eval_samples_per_second": 140.871,
+      "eval_steps_per_second": 17.718,
+      "step": 970
+    },
+    {
+      "epoch": 2.0618556701030926,
+      "grad_norm": 1.466252326965332,
+      "learning_rate": 6.254295532646049e-06,
+      "loss": 0.3931,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1455,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1290037407216696.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1455/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a4f56674f8158c153559cb00991d39e9e1da7258c61422695c423d072e36a2
+size 5368

checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForSequenceClassification"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

checkpoint-500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f789fb3a64bb3d3da5c05b694f6b85b477d7df94dfc430daccf0e346639a798e
+size 894023124

checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73b468103bbfc2060fa972425da1dc4e3abfc364989bbee8eb792eb29aeca506
+size 1788211642

checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63015f9c9da9c1eb68161ed45f5059e4d5e15a0f0c6e8e6c4440dbd2a86011ae
+size 14244

checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:467520a4a26a0774e35aff43925f1cec3b52f665aeb514f73a5e99d4ce275a2e
+size 1064

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0309278350515463,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.42261144518852234,
+      "eval_runtime": 6.9522,
+      "eval_samples_per_second": 139.523,
+      "eval_steps_per_second": 17.548,
+      "step": 485
+    },
+    {
+      "epoch": 1.0309278350515463,
+      "grad_norm": 2.8641510009765625,
+      "learning_rate": 1.3127147766323025e-05,
+      "loss": 0.6796,
+      "step": 500
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 1455,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 443325548610072.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a4f56674f8158c153559cb00991d39e9e1da7258c61422695c423d072e36a2
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "_name_or_path": "google/flan-t5-base",
+  "architectures": [
+    "T5ForSequenceClassification"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.47.1",
+  "use_cache": true,
+  "vocab_size": 32128
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2234af38ef801c0a87312e17ffcad5d716c14d6180b1e340ba9702198a0fd485
+size 894023124

runs/Dec27_05-33-36_debian/events.out.tfevents.1735277618.debian.3575638.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be9f902917d6def40e1e02f68f536273ad9e85e278da529d81001a3cb68e8d5e
+size 4184

runs/Dec27_05-35-26_debian/events.out.tfevents.1735277727.debian.3577983.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b301a69e50f918cc291259cfcf83c050fc314b8ffaf55675c0cf6e8c5aa9547
+size 4184

runs/Dec27_05-36-37_debian/events.out.tfevents.1735277798.debian.3579469.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea9fea4c34d9516d78b5ac169e3c3048cd3f2588ca5525d3fc7c1ec6594aef68
+size 7707

runs/Dec27_05-52-31_debian/events.out.tfevents.1735278751.debian.3602307.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3d64438bec602bcb5363f5aadbebfa8c2f8c4903dc40227e9ad541013126541
+size 7707

runs/Dec27_06-07-26_debian/events.out.tfevents.1735279646.debian.3622194.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98b802a43ec9b5db250ac00c330b28aae66fa23a3517d647ca534dd75dbbbcbb
+size 7707

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0a4f56674f8158c153559cb00991d39e9e1da7258c61422695c423d072e36a2
+size 5368