ShubhamMhaske commited on
Commit
4a8e476
·
verified ·
1 Parent(s): 884e6e5

Upload 19 files

Browse files
models/biomedical-ner-all/.cache/huggingface/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *
models/biomedical-ner-all/.cache/huggingface/download/.gitattributes.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ 4a6f8b4e86851998a84353598f557e78c6a947de
3
+ 1754896842.240534
models/biomedical-ner-all/.cache/huggingface/download/README.md.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ 69f017ddfd0d1d8e218c6dae3be8a5152a240cfd
3
+ 1754896842.038518
models/biomedical-ner-all/.cache/huggingface/download/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ fafdaa630753928d9007e2bc3ee977ac9716f970
3
+ 1754896841.0983357
models/biomedical-ner-all/.cache/huggingface/download/model.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ d744b846a71ce6ccdb49d7bfe5097eadc41e766ffd28481e1636ed796e820165
3
+ 1754896887.3323648
models/biomedical-ner-all/.cache/huggingface/download/pytorch_model.bin.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ b027673a3307002bc2c34795e627691e1a0b906ee3480036fb9a5b06d269f547
3
+ 1754896876.2208107
models/biomedical-ner-all/.cache/huggingface/download/special_tokens_map.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ a8b3208c2884c4efb86e49300fdd3dc877220cdf
3
+ 1754896841.0379386
models/biomedical-ner-all/.cache/huggingface/download/tokenizer.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ 05a08d51432c8ca05501d170fbc36142a93a163e
3
+ 1754896841.1887412
models/biomedical-ner-all/.cache/huggingface/download/tokenizer_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ 732c0433b37b44b3c0b10c740c53ab79d649276e
3
+ 1754896841.0983357
models/biomedical-ner-all/.cache/huggingface/download/vocab.txt.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 015a4050c9ac99722e61c547aa9b4282bcbedc7f
2
+ fb140275c155a9c7c5a3b3e0e77a9e839594a938
3
+ 1754896841.3756618
models/biomedical-ner-all/.gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
models/biomedical-ner-all/README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - Token Classification
7
+ co2_eq_emissions: 0.0279399890043426
8
+ widget:
9
+ - text: "CASE: A 28-year-old previously healthy man presented with a 6-week history of palpitations.
10
+ The symptoms occurred during rest, 2–3 times per week, lasted up to 30 minutes at a time and were associated with dyspnea.
11
+ Except for a grade 2/6 holosystolic tricuspid regurgitation murmur (best heard at the left sternal border with inspiratory accentuation), physical examination yielded unremarkable findings."
12
+ example_title: "example 1"
13
+ - text: "A 63-year-old woman with no known cardiac history presented with a sudden onset of dyspnea requiring intubation and ventilatory support out of hospital.
14
+ She denied preceding symptoms of chest discomfort, palpitations, syncope or infection.
15
+ The patient was afebrile and normotensive, with a sinus tachycardia of 140 beats/min."
16
+ example_title: "example 2"
17
+ - text: "A 48 year-old female presented with vaginal bleeding and abnormal Pap smears.
18
+ Upon diagnosis of invasive non-keratinizing SCC of the cervix, she underwent a radical hysterectomy with salpingo-oophorectomy which demonstrated positive spread to the pelvic lymph nodes and the parametrium.
19
+ Pathological examination revealed that the tumour also extensively involved the lower uterine segment."
20
+ example_title: "example 3"
21
+ ---
22
+
23
+ ## About the Model
24
+ An English Named Entity Recognition model, trained on Maccrobat to recognize the bio-medical entities (107 entities) from a given text corpus (case reports etc.). This model was built on top of distilbert-base-uncased
25
+
26
+ - Dataset: Maccrobat https://figshare.com/articles/dataset/MACCROBAT2018/9764942
27
+ - Carbon emission: 0.0279399890043426 Kg
28
+ - Training time: 30.16527 minutes
29
+ - GPU used : 1 x GeForce RTX 3060 Laptop GPU
30
+
31
+ Checkout the tutorial video for explanation of this model and corresponding python library: https://youtu.be/xpiDPdBpS18
32
+
33
+ ## Usage
34
+ The easiest way is to load the inference api from huggingface and second method is through the pipeline object offered by transformers library.
35
+ ```python
36
+ from transformers import pipeline
37
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
38
+
39
+ tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
40
+ model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
41
+
42
+ pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
43
+ pipe("""The patient reported no recurrence of palpitations at follow-up 6 months after the ablation.""")
44
+ ```
45
+
46
+ ## Author
47
+ This model is part of the Research topic "AI in Biomedical field" conducted by Deepak John Reji, Shaina Raza. If you use this work (code, model or dataset), please star at:
48
+ > https://github.com/dreji18/Bio-Epidemiology-NER
49
+
50
+ ## You can support me here :)
51
+ <a href="https://www.buymeacoffee.com/deepakjohnreji" target="_blank"><img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" style="height: 60px !important;width: 217px !important;" ></a>
models/biomedical-ner-all/config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForTokenClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "O",
13
+ "1": "B-Activity",
14
+ "2": "B-Administration",
15
+ "3": "B-Age",
16
+ "4": "B-Area",
17
+ "5": "B-Biological_attribute",
18
+ "6": "B-Biological_structure",
19
+ "7": "B-Clinical_event",
20
+ "8": "B-Color",
21
+ "9": "B-Coreference",
22
+ "10": "B-Date",
23
+ "11": "B-Detailed_description",
24
+ "12": "B-Diagnostic_procedure",
25
+ "13": "B-Disease_disorder",
26
+ "14": "B-Distance",
27
+ "15": "B-Dosage",
28
+ "16": "B-Duration",
29
+ "17": "B-Family_history",
30
+ "18": "B-Frequency",
31
+ "19": "B-Height",
32
+ "20": "B-History",
33
+ "21": "B-Lab_value",
34
+ "22": "B-Mass",
35
+ "23": "B-Medication",
36
+ "24": "B-Non[biological](Detailed_description",
37
+ "25": "B-Nonbiological_location",
38
+ "26": "B-Occupation",
39
+ "27": "B-Other_entity",
40
+ "28": "B-Other_event",
41
+ "29": "B-Outcome",
42
+ "30": "B-Personal_[back](Biological_structure",
43
+ "31": "B-Personal_background",
44
+ "32": "B-Qualitative_concept",
45
+ "33": "B-Quantitative_concept",
46
+ "34": "B-Severity",
47
+ "35": "B-Sex",
48
+ "36": "B-Shape",
49
+ "37": "B-Sign_symptom",
50
+ "38": "B-Subject",
51
+ "39": "B-Texture",
52
+ "40": "B-Therapeutic_procedure",
53
+ "41": "B-Time",
54
+ "42": "B-Volume",
55
+ "43": "B-Weight",
56
+ "44": "I-Activity",
57
+ "45": "I-Administration",
58
+ "46": "I-Age",
59
+ "47": "I-Area",
60
+ "48": "I-Biological_attribute",
61
+ "49": "I-Biological_structure",
62
+ "50": "I-Clinical_event",
63
+ "51": "I-Color",
64
+ "52": "I-Coreference",
65
+ "53": "I-Date",
66
+ "54": "I-Detailed_description",
67
+ "55": "I-Diagnostic_procedure",
68
+ "56": "I-Disease_disorder",
69
+ "57": "I-Distance",
70
+ "58": "I-Dosage",
71
+ "59": "I-Duration",
72
+ "60": "I-Family_history",
73
+ "61": "I-Frequency",
74
+ "62": "I-Height",
75
+ "63": "I-History",
76
+ "64": "I-Lab_value",
77
+ "65": "I-Mass",
78
+ "66": "I-Medication",
79
+ "67": "I-Nonbiological_location",
80
+ "68": "I-Occupation",
81
+ "69": "I-Other_entity",
82
+ "70": "I-Other_event",
83
+ "71": "I-Outcome",
84
+ "72": "I-Personal_background",
85
+ "73": "I-Qualitative_concept",
86
+ "74": "I-Quantitative_concept",
87
+ "75": "I-Severity",
88
+ "76": "I-Shape",
89
+ "77": "I-Sign_symptom",
90
+ "78": "I-Subject",
91
+ "79": "I-Texture",
92
+ "80": "I-Therapeutic_procedure",
93
+ "81": "I-Time",
94
+ "82": "I-Volume",
95
+ "83": "I-Weight"
96
+ },
97
+ "initializer_range": 0.02,
98
+ "label2id": {
99
+ "B-Activity": 1,
100
+ "B-Administration": 2,
101
+ "B-Age": 3,
102
+ "B-Area": 4,
103
+ "B-Biological_attribute": 5,
104
+ "B-Biological_structure": 6,
105
+ "B-Clinical_event": 7,
106
+ "B-Color": 8,
107
+ "B-Coreference": 9,
108
+ "B-Date": 10,
109
+ "B-Detailed_description": 11,
110
+ "B-Diagnostic_procedure": 12,
111
+ "B-Disease_disorder": 13,
112
+ "B-Distance": 14,
113
+ "B-Dosage": 15,
114
+ "B-Duration": 16,
115
+ "B-Family_history": 17,
116
+ "B-Frequency": 18,
117
+ "B-Height": 19,
118
+ "B-History": 20,
119
+ "B-Lab_value": 21,
120
+ "B-Mass": 22,
121
+ "B-Medication": 23,
122
+ "B-Non[biological](Detailed_description": 24,
123
+ "B-Nonbiological_location": 25,
124
+ "B-Occupation": 26,
125
+ "B-Other_entity": 27,
126
+ "B-Other_event": 28,
127
+ "B-Outcome": 29,
128
+ "B-Personal_[back](Biological_structure": 30,
129
+ "B-Personal_background": 31,
130
+ "B-Qualitative_concept": 32,
131
+ "B-Quantitative_concept": 33,
132
+ "B-Severity": 34,
133
+ "B-Sex": 35,
134
+ "B-Shape": 36,
135
+ "B-Sign_symptom": 37,
136
+ "B-Subject": 38,
137
+ "B-Texture": 39,
138
+ "B-Therapeutic_procedure": 40,
139
+ "B-Time": 41,
140
+ "B-Volume": 42,
141
+ "B-Weight": 43,
142
+ "I-Activity": 44,
143
+ "I-Administration": 45,
144
+ "I-Age": 46,
145
+ "I-Area": 47,
146
+ "I-Biological_attribute": 48,
147
+ "I-Biological_structure": 49,
148
+ "I-Clinical_event": 50,
149
+ "I-Color": 51,
150
+ "I-Coreference": 52,
151
+ "I-Date": 53,
152
+ "I-Detailed_description": 54,
153
+ "I-Diagnostic_procedure": 55,
154
+ "I-Disease_disorder": 56,
155
+ "I-Distance": 57,
156
+ "I-Dosage": 58,
157
+ "I-Duration": 59,
158
+ "I-Family_history": 60,
159
+ "I-Frequency": 61,
160
+ "I-Height": 62,
161
+ "I-History": 63,
162
+ "I-Lab_value": 64,
163
+ "I-Mass": 65,
164
+ "I-Medication": 66,
165
+ "I-Nonbiological_location": 67,
166
+ "I-Occupation": 68,
167
+ "I-Other_entity": 69,
168
+ "I-Other_event": 70,
169
+ "I-Outcome": 71,
170
+ "I-Personal_background": 72,
171
+ "I-Qualitative_concept": 73,
172
+ "I-Quantitative_concept": 74,
173
+ "I-Severity": 75,
174
+ "I-Shape": 76,
175
+ "I-Sign_symptom": 77,
176
+ "I-Subject": 78,
177
+ "I-Texture": 79,
178
+ "I-Therapeutic_procedure": 80,
179
+ "I-Time": 81,
180
+ "I-Volume": 82,
181
+ "I-Weight": 83,
182
+ "O": 0
183
+ },
184
+ "max_position_embeddings": 512,
185
+ "model_type": "distilbert",
186
+ "n_heads": 12,
187
+ "n_layers": 6,
188
+ "pad_token_id": 0,
189
+ "qa_dropout": 0.1,
190
+ "seq_classif_dropout": 0.2,
191
+ "sinusoidal_pos_embds": false,
192
+ "tie_weights_": true,
193
+ "torch_dtype": "float32",
194
+ "transformers_version": "4.20.1",
195
+ "vocab_size": 30522
196
+ }
models/biomedical-ner-all/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d744b846a71ce6ccdb49d7bfe5097eadc41e766ffd28481e1636ed796e820165
3
+ size 265722260
models/biomedical-ner-all/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b027673a3307002bc2c34795e627691e1a0b906ee3480036fb9a5b06d269f547
3
+ size 265743541
models/biomedical-ner-all/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
models/biomedical-ner-all/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/biomedical-ner-all/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "cuda": 0,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "distilbert-base-uncased",
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": null,
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "DistilBertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
models/biomedical-ner-all/vocab.txt ADDED
The diff for this file is too large to render. See raw diff