samant commited on
Commit
59c2c34
·
verified ·
1 Parent(s): d0312d4

Upload 8 files

Browse files

This model is a high-performance named entity recognition (NER) model achieving an overall F1-score of 0.97. It is optimized for identifying medical entities such as diseases, symptoms, procedures, and provider types, with strong precision and recall across diverse entity types, making it ideal for healthcare text processing tasks.

config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "B-DISEASE",
13
+ "1": "B-DOCUMENT_TYPE",
14
+ "2": "B-DOS",
15
+ "3": "B-DOS_A",
16
+ "4": "B-DOS_D",
17
+ "5": "B-LAB",
18
+ "6": "B-MEDICINE",
19
+ "7": "B-PROCEDURE",
20
+ "8": "B-PROVIDER_INDIVIDUAL",
21
+ "9": "B-PROVIDER_ORG",
22
+ "10": "B-SECTION_HEADING",
23
+ "11": "B-SYMPTOM",
24
+ "12": "B-VISIT_TYPE",
25
+ "13": "I-DISEASE",
26
+ "14": "I-DOCUMENT_TYPE",
27
+ "15": "I-DOS",
28
+ "16": "I-DOS_A",
29
+ "17": "I-DOS_D",
30
+ "18": "I-LAB",
31
+ "19": "I-MEDICINE",
32
+ "20": "I-PROCEDURE",
33
+ "21": "I-PROVIDER_INDIVIDUAL",
34
+ "22": "I-PROVIDER_ORG",
35
+ "23": "I-SECTION_HEADING",
36
+ "24": "I-SYMPTOM",
37
+ "25": "I-VISIT_TYPE",
38
+ "26": "O"
39
+ },
40
+ "initializer_range": 0.02,
41
+ "label2id": {
42
+ "B-DISEASE": 0,
43
+ "B-DOCUMENT_TYPE": 1,
44
+ "B-DOS": 2,
45
+ "B-DOS_A": 3,
46
+ "B-DOS_D": 4,
47
+ "B-LAB": 5,
48
+ "B-MEDICINE": 6,
49
+ "B-PROCEDURE": 7,
50
+ "B-PROVIDER_INDIVIDUAL": 8,
51
+ "B-PROVIDER_ORG": 9,
52
+ "B-SECTION_HEADING": 10,
53
+ "B-SYMPTOM": 11,
54
+ "B-VISIT_TYPE": 12,
55
+ "I-DISEASE": 13,
56
+ "I-DOCUMENT_TYPE": 14,
57
+ "I-DOS": 15,
58
+ "I-DOS_A": 16,
59
+ "I-DOS_D": 17,
60
+ "I-LAB": 18,
61
+ "I-MEDICINE": 19,
62
+ "I-PROCEDURE": 20,
63
+ "I-PROVIDER_INDIVIDUAL": 21,
64
+ "I-PROVIDER_ORG": 22,
65
+ "I-SECTION_HEADING": 23,
66
+ "I-SYMPTOM": 24,
67
+ "I-VISIT_TYPE": 25,
68
+ "O": 26
69
+ },
70
+ "max_position_embeddings": 512,
71
+ "model_type": "distilbert",
72
+ "n_heads": 12,
73
+ "n_layers": 6,
74
+ "pad_token_id": 0,
75
+ "qa_dropout": 0.1,
76
+ "seq_classif_dropout": 0.2,
77
+ "sinusoidal_pos_embds": false,
78
+ "tie_weights_": true,
79
+ "torch_dtype": "float32",
80
+ "transformers_version": "4.28.0",
81
+ "vocab_size": 30522
82
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:925cdb6b84985b8176010ee8912b923b5d0e97f1322ec4a7ab033574e8062766
3
+ size 871329797
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02898b0130d0cd667f80eebe553712c895afdfa27f8641d6c7ea3adea25cf24f
3
+ size 265569890
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b53a607245efc9146dd4ce91edbbd4f1bf7feeb8310d04d3a25343834daedd
3
+ size 4024
vocab.txt ADDED
The diff for this file is too large to render. See raw diff