initial commit

Files changed (14) hide show

README.md +262 -0
added_tokens.json +3 -0
all_results.json +13 -0
config.json +63 -0
eval_results.json +8 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +65 -0
train_results.json +8 -0
trainer_state.json +0 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,265 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+tags:
+- generated_from_trainer
+datasets:
+- ratishsp/newshead
+model-index:
+- name: Centrum
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Centrum
+Centrum is a pretrained model for multi-document summarization, trained with centroid-based pretraining objective on the NewSHead dataset. It is initialized from allenai/led-large-16384. The details of the approach are mentioned in the ACL 2023 Multi-Document Summarization with Centroid-Based Pretraining (Ratish Puduppully, Parag Jain, Nancy F. Chen and Mark Steedman). It achieves the following results on the evaluation set:
+- Loss: 3.3292
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 1
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 16
+- total_eval_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 10000
+- training_steps: 100000
+- mixed_precision_training: Native AMP
+- label_smoothing_factor: 0.1
+### Training results
+| Training Loss | Epoch | Step   | Validation Loss |
+|:-------------:|:-----:|:------:|:---------------:|
+| 3.7884        | 0.05  | 500    | 3.7054          |
+| 3.6593        | 0.09  | 1000   | 3.6245          |
+| 3.6425        | 0.14  | 1500   | 3.5841          |
+| 3.6008        | 0.19  | 2000   | 3.5561          |
+| 3.5645        | 0.23  | 2500   | 3.5372          |
+| 3.568         | 0.28  | 3000   | 3.5187          |
+| 3.5408        | 0.32  | 3500   | 3.5045          |
+| 3.5447        | 0.37  | 4000   | 3.4951          |
+| 3.5324        | 0.42  | 4500   | 3.4845          |
+| 3.5192        | 0.46  | 5000   | 3.4739          |
+| 3.4841        | 0.51  | 5500   | 3.4684          |
+| 3.4703        | 0.56  | 6000   | 3.4604          |
+| 3.4759        | 0.6   | 6500   | 3.4534          |
+| 3.4647        | 0.65  | 7000   | 3.4476          |
+| 3.4726        | 0.7   | 7500   | 3.4399          |
+| 3.4522        | 0.74  | 8000   | 3.4332          |
+| 3.4454        | 0.79  | 8500   | 3.4277          |
+| 3.4281        | 0.83  | 9000   | 3.4229          |
+| 3.4341        | 0.88  | 9500   | 3.4173          |
+| 3.4563        | 0.93  | 10000  | 3.4161          |
+| 3.4188        | 0.97  | 10500  | 3.4094          |
+| 3.3967        | 1.02  | 11000  | 3.4123          |
+| 3.3647        | 1.07  | 11500  | 3.4061          |
+| 3.3604        | 1.11  | 12000  | 3.4011          |
+| 3.3662        | 1.16  | 12500  | 3.4011          |
+| 3.3698        | 1.21  | 13000  | 3.3918          |
+| 3.3558        | 1.25  | 13500  | 3.3910          |
+| 3.3421        | 1.3   | 14000  | 3.3891          |
+| 3.3468        | 1.34  | 14500  | 3.3894          |
+| 3.3333        | 1.39  | 15000  | 3.3817          |
+| 3.3545        | 1.44  | 15500  | 3.3803          |
+| 3.3411        | 1.48  | 16000  | 3.3784          |
+| 3.3338        | 1.53  | 16500  | 3.3782          |
+| 3.3354        | 1.58  | 17000  | 3.3749          |
+| 3.3341        | 1.62  | 17500  | 3.3714          |
+| 3.3302        | 1.67  | 18000  | 3.3677          |
+| 3.3179        | 1.71  | 18500  | 3.3659          |
+| 3.3381        | 1.76  | 19000  | 3.3645          |
+| 3.3223        | 1.81  | 19500  | 3.3619          |
+| 3.3079        | 1.85  | 20000  | 3.3593          |
+| 3.3156        | 1.9   | 20500  | 3.3576          |
+| 3.3056        | 1.95  | 21000  | 3.3582          |
+| 3.3117        | 1.99  | 21500  | 3.3552          |
+| 3.2522        | 2.04  | 22000  | 3.3550          |
+| 3.2522        | 2.09  | 22500  | 3.3586          |
+| 3.2386        | 2.13  | 23000  | 3.3548          |
+| 3.2574        | 2.18  | 23500  | 3.3544          |
+| 3.239         | 2.22  | 24000  | 3.3566          |
+| 3.2468        | 2.27  | 24500  | 3.3528          |
+| 3.2264        | 2.32  | 25000  | 3.3511          |
+| 3.2501        | 2.36  | 25500  | 3.3482          |
+| 3.2204        | 2.41  | 26000  | 3.3506          |
+| 3.2302        | 2.46  | 26500  | 3.3526          |
+| 3.2353        | 2.5   | 27000  | 3.3492          |
+| 3.2494        | 2.55  | 27500  | 3.3452          |
+| 3.2423        | 2.6   | 28000  | 3.3455          |
+| 3.2233        | 2.64  | 28500  | 3.3447          |
+| 3.2498        | 2.69  | 29000  | 3.3420          |
+| 3.2175        | 2.73  | 29500  | 3.3457          |
+| 3.2398        | 2.78  | 30000  | 3.3402          |
+| 3.2242        | 2.83  | 30500  | 3.3421          |
+| 3.2185        | 2.87  | 31000  | 3.3457          |
+| 3.2274        | 2.92  | 31500  | 3.3419          |
+| 3.2251        | 2.97  | 32000  | 3.3449          |
+| 3.1507        | 3.01  | 32500  | 3.3518          |
+| 3.165         | 3.06  | 33000  | 3.3462          |
+| 3.1512        | 3.11  | 33500  | 3.3434          |
+| 3.1598        | 3.15  | 34000  | 3.3433          |
+| 3.1728        | 3.2   | 34500  | 3.3445          |
+| 3.1838        | 3.24  | 35000  | 3.3456          |
+| 3.1649        | 3.29  | 35500  | 3.3442          |
+| 3.1684        | 3.34  | 36000  | 3.3404          |
+| 3.1587        | 3.38  | 36500  | 3.3406          |
+| 3.1586        | 3.43  | 37000  | 3.3442          |
+| 3.1545        | 3.48  | 37500  | 3.3381          |
+| 3.1674        | 3.52  | 38000  | 3.3436          |
+| 3.1717        | 3.57  | 38500  | 3.3373          |
+| 3.147         | 3.62  | 39000  | 3.3408          |
+| 3.1462        | 3.66  | 39500  | 3.3374          |
+| 3.156         | 3.71  | 40000  | 3.3382          |
+| 3.1354        | 3.75  | 40500  | 3.3366          |
+| 3.1613        | 3.8   | 41000  | 3.3317          |
+| 3.143         | 3.85  | 41500  | 3.3347          |
+| 3.1667        | 3.89  | 42000  | 3.3353          |
+| 3.1597        | 3.94  | 42500  | 3.3341          |
+| 3.1566        | 3.99  | 43000  | 3.3357          |
+| 3.124         | 4.03  | 43500  | 3.3410          |
+| 3.1035        | 4.08  | 44000  | 3.3434          |
+| 3.0881        | 4.12  | 44500  | 3.3411          |
+| 3.1131        | 4.17  | 45000  | 3.3379          |
+| 3.1191        | 4.22  | 45500  | 3.3468          |
+| 3.1119        | 4.26  | 46000  | 3.3356          |
+| 3.0957        | 4.31  | 46500  | 3.3417          |
+| 3.1024        | 4.36  | 47000  | 3.3380          |
+| 3.1141        | 4.4   | 47500  | 3.3472          |
+| 3.0851        | 4.45  | 48000  | 3.3513          |
+| 3.1252        | 4.5   | 48500  | 3.3351          |
+| 3.1125        | 4.54  | 49000  | 3.3423          |
+| 3.1019        | 4.59  | 49500  | 3.3396          |
+| 3.1185        | 4.63  | 50000  | 3.3349          |
+| 3.1042        | 4.68  | 50500  | 3.3350          |
+| 3.1153        | 4.73  | 51000  | 3.3345          |
+| 3.1289        | 4.77  | 51500  | 3.3356          |
+| 3.1075        | 4.82  | 52000  | 3.3335          |
+| 3.1151        | 4.87  | 52500  | 3.3385          |
+| 3.094         | 4.91  | 53000  | 3.3292          |
+| 3.1272        | 4.96  | 53500  | 3.3349          |
+| 3.0847        | 5.01  | 54000  | 3.3407          |
+| 3.0662        | 5.05  | 54500  | 3.3378          |
+| 3.0345        | 5.1   | 55000  | 3.3481          |
+| 3.0611        | 5.14  | 55500  | 3.3410          |
+| 3.0566        | 5.19  | 56000  | 3.3424          |
+| 3.0413        | 5.24  | 56500  | 3.3466          |
+| 3.0291        | 5.28  | 57000  | 3.3453          |
+| 3.0569        | 5.33  | 57500  | 3.3491          |
+| 3.0645        | 5.38  | 58000  | 3.3378          |
+| 3.0646        | 5.42  | 58500  | 3.3434          |
+| 3.045         | 5.47  | 59000  | 3.3418          |
+| 3.0551        | 5.52  | 59500  | 3.3426          |
+| 3.0706        | 5.56  | 60000  | 3.3378          |
+| 3.0556        | 5.61  | 60500  | 3.3407          |
+| 3.0743        | 5.65  | 61000  | 3.3520          |
+| 3.0764        | 5.7   | 61500  | 3.3320          |
+| 3.0723        | 5.75  | 62000  | 3.3352          |
+| 3.0716        | 5.79  | 62500  | 3.3327          |
+| 3.0618        | 5.84  | 63000  | 3.3447          |
+| 3.0662        | 5.89  | 63500  | 3.3312          |
+| 3.0758        | 5.93  | 64000  | 3.3323          |
+| 3.0501        | 5.98  | 64500  | 3.3400          |
+| 2.978         | 6.03  | 65000  | 3.3473          |
+| 3.0131        | 6.07  | 65500  | 3.3440          |
+| 3.0212        | 6.12  | 66000  | 3.3401          |
+| 3.0095        | 6.16  | 66500  | 3.3361          |
+| 3.0118        | 6.21  | 67000  | 3.3352          |
+| 3.0249        | 6.26  | 67500  | 3.3398          |
+| 3.0107        | 6.3   | 68000  | 3.3444          |
+| 3.0175        | 6.35  | 68500  | 3.3490          |
+| 3.0241        | 6.4   | 69000  | 3.3402          |
+| 3.0094        | 6.44  | 69500  | 3.3437          |
+| 3.0286        | 6.49  | 70000  | 3.3355          |
+| 3.0391        | 6.54  | 70500  | 3.3385          |
+| 3.0243        | 6.58  | 71000  | 3.3395          |
+| 3.0232        | 6.63  | 71500  | 3.3370          |
+| 3.0168        | 6.67  | 72000  | 3.3458          |
+| 3.0432        | 6.72  | 72500  | 3.3400          |
+| 3.0121        | 6.77  | 73000  | 3.3420          |
+| 3.0137        | 6.81  | 73500  | 3.3436          |
+| 3.0333        | 6.86  | 74000  | 3.3362          |
+| 3.0194        | 6.91  | 74500  | 3.3355          |
+| 3.0198        | 6.95  | 75000  | 3.3434          |
+| 3.0105        | 7.0   | 75500  | 3.3346          |
+| 2.9833        | 7.04  | 76000  | 3.3492          |
+| 2.9876        | 7.09  | 76500  | 3.3351          |
+| 2.9918        | 7.14  | 77000  | 3.3466          |
+| 2.9983        | 7.18  | 77500  | 3.3422          |
+| 2.9893        | 7.23  | 78000  | 3.3364          |
+| 2.9946        | 7.28  | 78500  | 3.3365          |
+| 2.9851        | 7.32  | 79000  | 3.3402          |
+| 2.9797        | 7.37  | 79500  | 3.3450          |
+| 2.9888        | 7.42  | 80000  | 3.3423          |
+| 3.0182        | 7.46  | 80500  | 3.3429          |
+| 2.983         | 7.51  | 81000  | 3.3345          |
+| 2.9959        | 7.55  | 81500  | 3.3397          |
+| 2.9935        | 7.6   | 82000  | 3.3389          |
+| 3.0008        | 7.65  | 82500  | 3.3442          |
+| 2.9898        | 7.69  | 83000  | 3.3418          |
+| 2.9989        | 7.74  | 83500  | 3.3387          |
+| 2.985         | 7.79  | 84000  | 3.3482          |
+| 2.963         | 7.83  | 84500  | 3.3369          |
+| 3.0009        | 7.88  | 85000  | 3.3355          |
+| 2.9925        | 7.93  | 85500  | 3.3434          |
+| 2.9616        | 7.97  | 86000  | 3.3346          |
+| 2.9769        | 8.02  | 86500  | 3.3430          |
+| 2.9663        | 8.06  | 87000  | 3.3407          |
+| 2.9872        | 8.11  | 87500  | 3.3448          |
+| 2.9892        | 8.16  | 88000  | 3.3354          |
+| 2.9526        | 8.2   | 88500  | 3.3445          |
+| 2.9426        | 8.25  | 89000  | 3.3405          |
+| 2.9528        | 8.3   | 89500  | 3.3466          |
+| 2.9541        | 8.34  | 90000  | 3.3434          |
+| 2.9643        | 8.39  | 90500  | 3.3475          |
+| 2.9893        | 8.44  | 91000  | 3.3434          |
+| 2.9655        | 8.48  | 91500  | 3.3433          |
+| 2.9735        | 8.53  | 92000  | 3.3416          |
+| 2.9722        | 8.57  | 92500  | 3.3443          |
+| 2.9639        | 8.62  | 93000  | 3.3410          |
+| 2.972         | 8.67  | 93500  | 3.3407          |
+| 2.9586        | 8.71  | 94000  | 3.3393          |
+| 2.9591        | 8.76  | 94500  | 3.3412          |
+| 2.9523        | 8.81  | 95000  | 3.3411          |
+| 2.9572        | 8.85  | 95500  | 3.3393          |
+| 2.9435        | 8.9   | 96000  | 3.3414          |
+| 2.9667        | 8.95  | 96500  | 3.3392          |
+| 2.9824        | 8.99  | 97000  | 3.3428          |
+| 2.9265        | 9.04  | 97500  | 3.3417          |
+| 2.9409        | 9.08  | 98000  | 3.3435          |
+| 2.9387        | 9.13  | 98500  | 3.3425          |
+| 2.9635        | 9.18  | 99000  | 3.3420          |
+| 2.9527        | 9.22  | 99500  | 3.3421          |
+| 2.9755        | 9.27  | 100000 | 3.3430          |
+### Framework versions
+- Transformers 4.23.0.dev0
+- Pytorch 1.12.1
+- Datasets 2.6.1
+- Tokenizers 0.13.1

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<doc-sep>": 50265
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 9.27,
+    "eval_loss": 3.329162836074829,
+    "eval_runtime": 58.6716,
+    "eval_samples": 2522,
+    "eval_samples_per_second": 42.985,
+    "eval_steps_per_second": 2.693,
+    "train_loss": 0.653541208190918,
+    "train_runtime": 35450.0434,
+    "train_samples": 172615,
+    "train_samples_per_second": 45.134,
+    "train_steps_per_second": 2.821
+}

config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "_name_or_path": "allenai/led-large-16384",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "architectures": [
+    "LEDForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "attention_window": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_decoder_position_embeddings": 1024,
+  "max_encoder_position_embeddings": 4096,
+  "model_type": "led",
+  "num_hidden_layers": 12,
+  "output_past": false,
+  "pad_token_id": 1,
+  "prefix": " ",
+  "torch_dtype": "float32",
+  "transformers_version": "4.23.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50266
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 9.27,
+    "eval_loss": 3.329162836074829,
+    "eval_runtime": 58.6716,
+    "eval_samples": 2522,
+    "eval_samples_per_second": 42.985,
+    "eval_steps_per_second": 2.693
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95ddb4ef9e51bc6de78c2c9c7ac0d92951df8d0c1b18c8ac5aa1352b7e052ddb
+size 1789277169

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "mask_token": {
+    "__type": "AddedToken",
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "model_max_length": 16384,
+  "name_or_path": "allenai/led-large-16384",
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "special_tokens_map_file": "/home/co-jai1/.cache/huggingface/hub/models--allenai--led-large-16384/snapshots/04472a9a5d3af2efe700dda11da6063c68cd27a4/special_tokens_map.json",
+  "tokenizer_class": "LEDTokenizer",
+  "trim_offsets": true,
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 9.27,
+    "train_loss": 0.653541208190918,
+    "train_runtime": 35450.0434,
+    "train_samples": 172615,
+    "train_samples_per_second": 45.134,
+    "train_steps_per_second": 2.821
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2cff5695acf51d4ce3eb7ca0ce822bec1f88009d90b7fa93d343f5b83056199
+size 3759

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff