Upload 8 files

This model is a high-performance named entity recognition (NER) model achieving an overall F1-score of 0.97. It is optimized for identifying medical entities such as diseases, symptoms, procedures, and provider types, with strong precision and recall across diverse entity types, making it ideal for healthcare text processing tasks.

Files changed (8) hide show

config.json +82 -0
optimizer.pt +3 -0
pytorch_model.bin +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +13 -0
training_args.bin +3 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForTokenClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "B-DISEASE",
+    "1": "B-DOCUMENT_TYPE",
+    "2": "B-DOS",
+    "3": "B-DOS_A",
+    "4": "B-DOS_D",
+    "5": "B-LAB",
+    "6": "B-MEDICINE",
+    "7": "B-PROCEDURE",
+    "8": "B-PROVIDER_INDIVIDUAL",
+    "9": "B-PROVIDER_ORG",
+    "10": "B-SECTION_HEADING",
+    "11": "B-SYMPTOM",
+    "12": "B-VISIT_TYPE",
+    "13": "I-DISEASE",
+    "14": "I-DOCUMENT_TYPE",
+    "15": "I-DOS",
+    "16": "I-DOS_A",
+    "17": "I-DOS_D",
+    "18": "I-LAB",
+    "19": "I-MEDICINE",
+    "20": "I-PROCEDURE",
+    "21": "I-PROVIDER_INDIVIDUAL",
+    "22": "I-PROVIDER_ORG",
+    "23": "I-SECTION_HEADING",
+    "24": "I-SYMPTOM",
+    "25": "I-VISIT_TYPE",
+    "26": "O"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "B-DISEASE": 0,
+    "B-DOCUMENT_TYPE": 1,
+    "B-DOS": 2,
+    "B-DOS_A": 3,
+    "B-DOS_D": 4,
+    "B-LAB": 5,
+    "B-MEDICINE": 6,
+    "B-PROCEDURE": 7,
+    "B-PROVIDER_INDIVIDUAL": 8,
+    "B-PROVIDER_ORG": 9,
+    "B-SECTION_HEADING": 10,
+    "B-SYMPTOM": 11,
+    "B-VISIT_TYPE": 12,
+    "I-DISEASE": 13,
+    "I-DOCUMENT_TYPE": 14,
+    "I-DOS": 15,
+    "I-DOS_A": 16,
+    "I-DOS_D": 17,
+    "I-LAB": 18,
+    "I-MEDICINE": 19,
+    "I-PROCEDURE": 20,
+    "I-PROVIDER_INDIVIDUAL": 21,
+    "I-PROVIDER_ORG": 22,
+    "I-SECTION_HEADING": 23,
+    "I-SYMPTOM": 24,
+    "I-VISIT_TYPE": 25,
+    "O": 26
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.0",
+  "vocab_size": 30522
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:925cdb6b84985b8176010ee8912b923b5d0e97f1322ec4a7ab033574e8062766
+size 871329797

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02898b0130d0cd667f80eebe553712c895afdfa27f8641d6c7ea3adea25cf24f
+size 265569890

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77b53a607245efc9146dd4ce91edbbd4f1bf7feeb8310d04d3a25343834daedd
+size 4024

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff