nbtpj commited on Dec 5, 2022

Commit

ed6e800

•

1 Parent(s): 105ca5e

Training in progress, step 2500

Browse files

Files changed (24) hide show

.gitignore +1 -0
config.json +75 -0
last-checkpoint/config.json +75 -0
last-checkpoint/merges.txt +0 -0
last-checkpoint/optimizer.pt +3 -0
last-checkpoint/pytorch_model.bin +3 -0
last-checkpoint/rng_state.pth +3 -0
last-checkpoint/scheduler.pt +3 -0
last-checkpoint/special_tokens_map.json +15 -0
last-checkpoint/tokenizer.json +0 -0
last-checkpoint/tokenizer_config.json +16 -0
last-checkpoint/trainer_state.json +166 -0
last-checkpoint/training_args.bin +3 -0
last-checkpoint/vocab.json +0 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
runs/Dec05_03-30-37_fbdce2302f52/1670211053.077461/events.out.tfevents.1670211053.fbdce2302f52.24.1 +3 -0
runs/Dec05_03-30-37_fbdce2302f52/1670211070.5940251/events.out.tfevents.1670211070.fbdce2302f52.24.2 +3 -0
runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0 +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +16 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ checkpoint-*/

config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "EncoderDecoderForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

last-checkpoint/config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "EncoderDecoderForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
+  "use_cache": true,
+  "vocab_size": 50265
+}

last-checkpoint/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e70a5b8d2b6d8bb2b71cbf79cabc3993446edecaa7b23b0dea42478b41078d9
+size 1115513717

last-checkpoint/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fde73781876577ba51e23daaaa70298a6d5d24f8539c4cbbc62cc8be970fbbf
+size 557969145

last-checkpoint/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d97bff542b3a181a6f90fac3fb93bd368232f8c16c24b45de7dd660874b059f
+size 15459

last-checkpoint/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17bf1eb09751eea54d404e38767abbd71f94a7738ff7edea23400551f49f406c
+size 623

last-checkpoint/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

last-checkpoint/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "name_or_path": "facebook/bart-base",
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

last-checkpoint/trainer_state.json ADDED Viewed

	@@ -0,0 +1,166 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.14746652509880256,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.96526184702104e-05,
+      "loss": 1.1303,
+      "step": 100
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 4.932490004588058e-05,
+      "loss": 0.8589,
+      "step": 200
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.8997181621550765e-05,
+      "loss": 0.9136,
+      "step": 300
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 4.866946319722095e-05,
+      "loss": 0.8209,
+      "step": 400
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 4.8341744772891136e-05,
+      "loss": 0.8212,
+      "step": 500
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 4.801402634856132e-05,
+      "loss": 0.7584,
+      "step": 600
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 4.768630792423151e-05,
+      "loss": 0.8386,
+      "step": 700
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 4.735858949990168e-05,
+      "loss": 0.7838,
+      "step": 800
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 4.703087107557187e-05,
+      "loss": 0.7109,
+      "step": 900
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 4.670315265124205e-05,
+      "loss": 0.8817,
+      "step": 1000
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 4.637543422691224e-05,
+      "loss": 0.7,
+      "step": 1100
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.6047715802582424e-05,
+      "loss": 0.7984,
+      "step": 1200
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 4.571999737825261e-05,
+      "loss": 0.7983,
+      "step": 1300
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 4.539227895392279e-05,
+      "loss": 0.8209,
+      "step": 1400
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 4.506456052959298e-05,
+      "loss": 0.7698,
+      "step": 1500
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 4.473684210526316e-05,
+      "loss": 0.6812,
+      "step": 1600
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 4.440912368093335e-05,
+      "loss": 0.7507,
+      "step": 1700
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 4.408140525660353e-05,
+      "loss": 0.7081,
+      "step": 1800
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 4.375368683227371e-05,
+      "loss": 0.809,
+      "step": 1900
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 4.3425968407943895e-05,
+      "loss": 0.7504,
+      "step": 2000
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 4.3098249983614084e-05,
+      "loss": 0.764,
+      "step": 2100
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 4.2770531559284266e-05,
+      "loss": 0.7374,
+      "step": 2200
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 4.244281313495445e-05,
+      "loss": 0.786,
+      "step": 2300
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 4.211509471062463e-05,
+      "loss": 0.8193,
+      "step": 2400
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 4.178737628629482e-05,
+      "loss": 0.6467,
+      "step": 2500
+    }
+  ],
+  "max_steps": 30516,
+  "num_train_epochs": 2,
+  "total_flos": 1.527980033728512e+16,
+  "trial_name": null,
+  "trial_params": null
+}

last-checkpoint/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50d8747e43721b80b6e9bd368539717424fbb7049d6db13269fc09fffe501769
+size 3311

last-checkpoint/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fde73781876577ba51e23daaaa70298a6d5d24f8539c4cbbc62cc8be970fbbf
+size 557969145

runs/Dec05_03-30-37_fbdce2302f52/1670211053.077461/events.out.tfevents.1670211053.fbdce2302f52.24.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17196ec21a82f76e217c7ffce0ac112d7a1377e731f7e4e715396e0958931099
+size 5371

runs/Dec05_03-30-37_fbdce2302f52/1670211070.5940251/events.out.tfevents.1670211070.fbdce2302f52.24.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eeeeee803174737b1d3754f00658c4cff8ee63fea0060026cd174d92f1b22d4
+size 5371

runs/Dec05_03-30-37_fbdce2302f52/events.out.tfevents.1670211053.fbdce2302f52.24.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26ca8396fc1d5ed47cc18d718589ef3682bd76d405c05f1616f84f6bcb3e1620
+size 13328

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "name_or_path": "facebook/bart-base",
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50d8747e43721b80b6e9bd368539717424fbb7049d6db13269fc09fffe501769
+size 3311

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff