bert-squad

Browse files

Files changed (8) hide show

README.md +208 -0
config.json +25 -0
pytorch_model.bin +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,208 @@

+---
+license: mit
+base_model: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
+tags:
+- generated_from_trainer
+datasets:
+- squad
+model-index:
+- name: bert-squadv2
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# bert-squadv2
+This model is a fine-tuned version of [microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext](https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext) on the squad dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.1930
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 3
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 5.9623        | 0.02  | 5    | 5.8084          |
+| 5.6934        | 0.04  | 10   | 5.4377          |
+| 5.2457        | 0.06  | 15   | 4.8548          |
+| 4.5796        | 0.08  | 20   | 4.2851          |
+| 4.1507        | 0.1   | 25   | 3.9911          |
+| 4.1134        | 0.12  | 30   | 3.7444          |
+| 3.8076        | 0.14  | 35   | 3.5019          |
+| 3.8445        | 0.16  | 40   | 3.0715          |
+| 3.0969        | 0.18  | 45   | 2.6475          |
+| 2.8899        | 0.2   | 50   | 2.5662          |
+| 2.8354        | 0.22  | 55   | 2.3382          |
+| 3.1775        | 0.24  | 60   | 2.2028          |
+| 2.3935        | 0.26  | 65   | 2.2038          |
+| 2.3994        | 0.28  | 70   | 1.9708          |
+| 2.2664        | 0.3   | 75   | 1.9092          |
+| 1.8134        | 0.32  | 80   | 1.9546          |
+| 2.1905        | 0.34  | 85   | 1.8623          |
+| 2.3941        | 0.36  | 90   | 1.7622          |
+| 1.8807        | 0.38  | 95   | 1.7976          |
+| 2.3562        | 0.4   | 100  | 1.7311          |
+| 2.1116        | 0.42  | 105  | 1.6848          |
+| 1.8022        | 0.44  | 110  | 1.6636          |
+| 2.0378        | 0.46  | 115  | 1.6401          |
+| 1.7313        | 0.48  | 120  | 1.6013          |
+| 1.9304        | 0.5   | 125  | 1.5312          |
+| 1.7668        | 0.52  | 130  | 1.4995          |
+| 1.908         | 0.54  | 135  | 1.5222          |
+| 1.9348        | 0.56  | 140  | 1.5180          |
+| 1.7307        | 0.58  | 145  | 1.4694          |
+| 1.9088        | 0.6   | 150  | 1.4597          |
+| 1.3283        | 0.62  | 155  | 1.4631          |
+| 1.6898        | 0.64  | 160  | 1.4715          |
+| 1.7079        | 0.66  | 165  | 1.4565          |
+| 1.6261        | 0.68  | 170  | 1.4246          |
+| 1.5628        | 0.7   | 175  | 1.4248          |
+| 1.7642        | 0.72  | 180  | 1.4261          |
+| 1.5168        | 0.74  | 185  | 1.4088          |
+| 1.5967        | 0.76  | 190  | 1.4028          |
+| 1.275         | 0.78  | 195  | 1.4294          |
+| 1.596         | 0.8   | 200  | 1.4128          |
+| 1.5765        | 0.82  | 205  | 1.4032          |
+| 1.6554        | 0.84  | 210  | 1.3599          |
+| 1.785         | 0.86  | 215  | 1.3221          |
+| 1.4147        | 0.88  | 220  | 1.3299          |
+| 1.4364        | 0.9   | 225  | 1.3510          |
+| 1.6059        | 0.92  | 230  | 1.2959          |
+| 1.305         | 0.94  | 235  | 1.2871          |
+| 1.4614        | 0.96  | 240  | 1.2986          |
+| 1.3531        | 0.98  | 245  | 1.3891          |
+| 1.3192        | 1.0   | 250  | 1.3526          |
+| 1.0726        | 1.02  | 255  | 1.3378          |
+| 1.1724        | 1.04  | 260  | 1.3207          |
+| 1.2818        | 1.06  | 265  | 1.3034          |
+| 1.1           | 1.08  | 270  | 1.2991          |
+| 1.0719        | 1.1   | 275  | 1.2799          |
+| 1.231         | 1.12  | 280  | 1.2880          |
+| 1.3378        | 1.14  | 285  | 1.3066          |
+| 1.0818        | 1.16  | 290  | 1.2954          |
+| 1.0873        | 1.18  | 295  | 1.2754          |
+| 1.1567        | 1.2   | 300  | 1.2741          |
+| 1.1031        | 1.22  | 305  | 1.2502          |
+| 1.1391        | 1.24  | 310  | 1.2674          |
+| 1.2142        | 1.26  | 315  | 1.2849          |
+| 0.9893        | 1.28  | 320  | 1.2841          |
+| 1.0846        | 1.3   | 325  | 1.2748          |
+| 1.2535        | 1.32  | 330  | 1.2628          |
+| 1.1309        | 1.34  | 335  | 1.2410          |
+| 0.9969        | 1.36  | 340  | 1.2267          |
+| 1.0932        | 1.38  | 345  | 1.2032          |
+| 1.4972        | 1.4   | 350  | 1.1923          |
+| 0.9547        | 1.42  | 355  | 1.1954          |
+| 1.1322        | 1.44  | 360  | 1.2043          |
+| 0.8833        | 1.46  | 365  | 1.2234          |
+| 0.7986        | 1.48  | 370  | 1.2600          |
+| 1.1929        | 1.5   | 375  | 1.2788          |
+| 0.9585        | 1.52  | 380  | 1.2554          |
+| 1.3862        | 1.54  | 385  | 1.2165          |
+| 1.1168        | 1.56  | 390  | 1.2064          |
+| 1.135         | 1.58  | 395  | 1.1976          |
+| 0.8741        | 1.6   | 400  | 1.1933          |
+| 1.3593        | 1.62  | 405  | 1.1857          |
+| 1.0084        | 1.64  | 410  | 1.1851          |
+| 0.9579        | 1.66  | 415  | 1.1728          |
+| 0.9541        | 1.68  | 420  | 1.1721          |
+| 1.2569        | 1.7   | 425  | 1.1773          |
+| 1.0629        | 1.72  | 430  | 1.1717          |
+| 1.1233        | 1.74  | 435  | 1.1671          |
+| 0.8304        | 1.76  | 440  | 1.1742          |
+| 0.8097        | 1.78  | 445  | 1.1861          |
+| 0.9703        | 1.8   | 450  | 1.1822          |
+| 1.1413        | 1.82  | 455  | 1.1909          |
+| 1.0977        | 1.84  | 460  | 1.1938          |
+| 1.0375        | 1.86  | 465  | 1.1839          |
+| 1.0726        | 1.88  | 470  | 1.1871          |
+| 1.1322        | 1.9   | 475  | 1.2020          |
+| 1.0286        | 1.92  | 480  | 1.2004          |
+| 0.9395        | 1.94  | 485  | 1.1981          |
+| 1.059         | 1.96  | 490  | 1.1772          |
+| 1.0722        | 1.98  | 495  | 1.1568          |
+| 0.8618        | 2.0   | 500  | 1.1475          |
+| 0.9305        | 2.02  | 505  | 1.1554          |
+| 0.8525        | 2.04  | 510  | 1.1740          |
+| 1.0687        | 2.06  | 515  | 1.1759          |
+| 0.8899        | 2.08  | 520  | 1.1647          |
+| 0.6881        | 2.1   | 525  | 1.1755          |
+| 0.8582        | 2.12  | 530  | 1.1920          |
+| 0.6645        | 2.14  | 535  | 1.1952          |
+| 0.6028        | 2.16  | 540  | 1.2121          |
+| 0.7364        | 2.18  | 545  | 1.2169          |
+| 0.5562        | 2.2   | 550  | 1.2278          |
+| 0.6175        | 2.22  | 555  | 1.2413          |
+| 0.5392        | 2.24  | 560  | 1.2466          |
+| 0.8727        | 2.26  | 565  | 1.2362          |
+| 0.6778        | 2.28  | 570  | 1.2253          |
+| 0.685         | 2.3   | 575  | 1.2254          |
+| 0.8991        | 2.32  | 580  | 1.2181          |
+| 1.0157        | 2.34  | 585  | 1.2044          |
+| 0.5054        | 2.36  | 590  | 1.1943          |
+| 0.8036        | 2.38  | 595  | 1.1950          |
+| 0.6207        | 2.4   | 600  | 1.2025          |
+| 0.6828        | 2.42  | 605  | 1.2178          |
+| 0.8008        | 2.44  | 610  | 1.2312          |
+| 0.739         | 2.46  | 615  | 1.2401          |
+| 0.5479        | 2.48  | 620  | 1.2459          |
+| 0.9443        | 2.5   | 625  | 1.2359          |
+| 0.7468        | 2.52  | 630  | 1.2264          |
+| 0.6803        | 2.54  | 635  | 1.2223          |
+| 0.8997        | 2.56  | 640  | 1.2208          |
+| 0.7044        | 2.58  | 645  | 1.2118          |
+| 0.707         | 2.6   | 650  | 1.2076          |
+| 0.7813        | 2.62  | 655  | 1.2072          |
+| 0.6376        | 2.64  | 660  | 1.2122          |
+| 0.8885        | 2.66  | 665  | 1.2141          |
+| 0.7359        | 2.68  | 670  | 1.2121          |
+| 0.6928        | 2.7   | 675  | 1.2113          |
+| 0.7706        | 2.72  | 680  | 1.2082          |
+| 0.884         | 2.74  | 685  | 1.2033          |
+| 0.6362        | 2.76  | 690  | 1.1991          |
+| 0.8517        | 2.78  | 695  | 1.1959          |
+| 0.7713        | 2.8   | 700  | 1.1954          |
+| 0.8654        | 2.82  | 705  | 1.1945          |
+| 0.6268        | 2.84  | 710  | 1.1923          |
+| 0.8246        | 2.86  | 715  | 1.1919          |
+| 0.646         | 2.88  | 720  | 1.1920          |
+| 0.8648        | 2.9   | 725  | 1.1922          |
+| 0.8398        | 2.92  | 730  | 1.1928          |
+| 0.6281        | 2.94  | 735  | 1.1931          |
+| 0.6319        | 2.96  | 740  | 1.1927          |
+| 0.6304        | 2.98  | 745  | 1.1932          |
+| 0.6554        | 3.0   | 750  | 1.1930          |
+### Framework versions
+- Transformers 4.34.1
+- Pytorch 2.1.0+cu118
+- Datasets 2.14.5
+- Tokenizers 0.14.1

config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext",
+  "architectures": [
+    "BertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.34.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73b4ff493bf402aa1dc0dce39c0676315521caa4a46127ee4f4bbee80dc63248
+size 435640934

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ac14c51a08af4744d839adcf6df410d7af5cfa1ea11e37d5b5aed558837bff8
+size 4536

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff