Upload 8 files
Browse filesThis model is a high-performance named entity recognition (NER) model achieving an overall F1-score of 0.97. It is optimized for identifying medical entities such as diseases, symptoms, procedures, and provider types, with strong precision and recall across diverse entity types, making it ideal for healthcare text processing tasks.
- config.json +82 -0
- optimizer.pt +3 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +13 -0
- training_args.bin +3 -0
- vocab.txt +0 -0
config.json
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "distilbert-base-uncased",
|
3 |
+
"activation": "gelu",
|
4 |
+
"architectures": [
|
5 |
+
"DistilBertForTokenClassification"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.1,
|
8 |
+
"dim": 768,
|
9 |
+
"dropout": 0.1,
|
10 |
+
"hidden_dim": 3072,
|
11 |
+
"id2label": {
|
12 |
+
"0": "B-DISEASE",
|
13 |
+
"1": "B-DOCUMENT_TYPE",
|
14 |
+
"2": "B-DOS",
|
15 |
+
"3": "B-DOS_A",
|
16 |
+
"4": "B-DOS_D",
|
17 |
+
"5": "B-LAB",
|
18 |
+
"6": "B-MEDICINE",
|
19 |
+
"7": "B-PROCEDURE",
|
20 |
+
"8": "B-PROVIDER_INDIVIDUAL",
|
21 |
+
"9": "B-PROVIDER_ORG",
|
22 |
+
"10": "B-SECTION_HEADING",
|
23 |
+
"11": "B-SYMPTOM",
|
24 |
+
"12": "B-VISIT_TYPE",
|
25 |
+
"13": "I-DISEASE",
|
26 |
+
"14": "I-DOCUMENT_TYPE",
|
27 |
+
"15": "I-DOS",
|
28 |
+
"16": "I-DOS_A",
|
29 |
+
"17": "I-DOS_D",
|
30 |
+
"18": "I-LAB",
|
31 |
+
"19": "I-MEDICINE",
|
32 |
+
"20": "I-PROCEDURE",
|
33 |
+
"21": "I-PROVIDER_INDIVIDUAL",
|
34 |
+
"22": "I-PROVIDER_ORG",
|
35 |
+
"23": "I-SECTION_HEADING",
|
36 |
+
"24": "I-SYMPTOM",
|
37 |
+
"25": "I-VISIT_TYPE",
|
38 |
+
"26": "O"
|
39 |
+
},
|
40 |
+
"initializer_range": 0.02,
|
41 |
+
"label2id": {
|
42 |
+
"B-DISEASE": 0,
|
43 |
+
"B-DOCUMENT_TYPE": 1,
|
44 |
+
"B-DOS": 2,
|
45 |
+
"B-DOS_A": 3,
|
46 |
+
"B-DOS_D": 4,
|
47 |
+
"B-LAB": 5,
|
48 |
+
"B-MEDICINE": 6,
|
49 |
+
"B-PROCEDURE": 7,
|
50 |
+
"B-PROVIDER_INDIVIDUAL": 8,
|
51 |
+
"B-PROVIDER_ORG": 9,
|
52 |
+
"B-SECTION_HEADING": 10,
|
53 |
+
"B-SYMPTOM": 11,
|
54 |
+
"B-VISIT_TYPE": 12,
|
55 |
+
"I-DISEASE": 13,
|
56 |
+
"I-DOCUMENT_TYPE": 14,
|
57 |
+
"I-DOS": 15,
|
58 |
+
"I-DOS_A": 16,
|
59 |
+
"I-DOS_D": 17,
|
60 |
+
"I-LAB": 18,
|
61 |
+
"I-MEDICINE": 19,
|
62 |
+
"I-PROCEDURE": 20,
|
63 |
+
"I-PROVIDER_INDIVIDUAL": 21,
|
64 |
+
"I-PROVIDER_ORG": 22,
|
65 |
+
"I-SECTION_HEADING": 23,
|
66 |
+
"I-SYMPTOM": 24,
|
67 |
+
"I-VISIT_TYPE": 25,
|
68 |
+
"O": 26
|
69 |
+
},
|
70 |
+
"max_position_embeddings": 512,
|
71 |
+
"model_type": "distilbert",
|
72 |
+
"n_heads": 12,
|
73 |
+
"n_layers": 6,
|
74 |
+
"pad_token_id": 0,
|
75 |
+
"qa_dropout": 0.1,
|
76 |
+
"seq_classif_dropout": 0.2,
|
77 |
+
"sinusoidal_pos_embds": false,
|
78 |
+
"tie_weights_": true,
|
79 |
+
"torch_dtype": "float32",
|
80 |
+
"transformers_version": "4.28.0",
|
81 |
+
"vocab_size": 30522
|
82 |
+
}
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:925cdb6b84985b8176010ee8912b923b5d0e97f1322ec4a7ab033574e8062766
|
3 |
+
size 871329797
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02898b0130d0cd667f80eebe553712c895afdfa27f8641d6c7ea3adea25cf24f
|
3 |
+
size 265569890
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"clean_up_tokenization_spaces": true,
|
3 |
+
"cls_token": "[CLS]",
|
4 |
+
"do_lower_case": true,
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"model_max_length": 512,
|
7 |
+
"pad_token": "[PAD]",
|
8 |
+
"sep_token": "[SEP]",
|
9 |
+
"strip_accents": null,
|
10 |
+
"tokenize_chinese_chars": true,
|
11 |
+
"tokenizer_class": "DistilBertTokenizer",
|
12 |
+
"unk_token": "[UNK]"
|
13 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77b53a607245efc9146dd4ce91edbbd4f1bf7feeb8310d04d3a25343834daedd
|
3 |
+
size 4024
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|