Text Classification
spaCy
English
medical
tarak00003 commited on
Commit
f7eb132
1 Parent(s): 1e92129

Upload 3 files

Browse files
Files changed (3) hide show
  1. medicalsymptoms1.ipynb +483 -0
  2. medicalsymptoms1.py +88 -0
  3. symptomssingle.csv +0 -0
medicalsymptoms1.ipynb ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 8,
20
+ "metadata": {
21
+ "colab": {
22
+ "base_uri": "https://localhost:8080/"
23
+ },
24
+ "id": "8xtpBxD_mHlR",
25
+ "outputId": "be506f46-b4cf-42a1-c98e-c3ffe66d1254"
26
+ },
27
+ "outputs": [
28
+ {
29
+ "output_type": "stream",
30
+ "name": "stdout",
31
+ "text": [
32
+ "Accuracy: 0.0\n",
33
+ "Classification Report:\n",
34
+ " precision recall f1-score support\n",
35
+ "\n",
36
+ " Acanthosis nigricans 0.00 0.00 0.00 0.0\n",
37
+ " Acariasis 0.00 0.00 0.00 0.0\n",
38
+ " Acne 0.00 0.00 0.00 0.0\n",
39
+ " Acute bronchitis 0.00 0.00 0.00 1.0\n",
40
+ " Acute bronchospasm 0.00 0.00 0.00 1.0\n",
41
+ " Acute glaucoma 0.00 0.00 0.00 1.0\n",
42
+ " Acute pancreatitis 0.00 0.00 0.00 0.0\n",
43
+ " Acute stress reaction 0.00 0.00 0.00 1.0\n",
44
+ " Adjustment reaction 0.00 0.00 0.00 1.0\n",
45
+ " Alcohol intoxication 0.00 0.00 0.00 0.0\n",
46
+ " Alcohol withdrawal 0.00 0.00 0.00 1.0\n",
47
+ " Alcoholic liver disease 0.00 0.00 0.00 0.0\n",
48
+ " Allergy 0.00 0.00 0.00 0.0\n",
49
+ " Allergy to animals 0.00 0.00 0.00 1.0\n",
50
+ " Anemia due to chronic kidney disease 0.00 0.00 0.00 1.0\n",
51
+ " Anemia of chronic disease 0.00 0.00 0.00 1.0\n",
52
+ " Angina 0.00 0.00 0.00 0.0\n",
53
+ " Ankylosing spondylitis 0.00 0.00 0.00 0.0\n",
54
+ " Aphakia 0.00 0.00 0.00 0.0\n",
55
+ " Aphthous ulcer 0.00 0.00 0.00 1.0\n",
56
+ " Arthritis of the hip 0.00 0.00 0.00 1.0\n",
57
+ " Asthma 0.00 0.00 0.00 0.0\n",
58
+ " Atelectasis 0.00 0.00 0.00 0.0\n",
59
+ " Athlete's foot 0.00 0.00 0.00 1.0\n",
60
+ " Atonic bladder 0.00 0.00 0.00 0.0\n",
61
+ " Atrial fibrillation 0.00 0.00 0.00 0.0\n",
62
+ " Benign vaginal discharge (leukorrhea) 0.00 0.00 0.00 0.0\n",
63
+ " Bipolar disorder 0.00 0.00 0.00 1.0\n",
64
+ " Birth trauma 0.00 0.00 0.00 0.0\n",
65
+ " Bladder cancer 0.00 0.00 0.00 0.0\n",
66
+ " Breast cancer 0.00 0.00 0.00 1.0\n",
67
+ " Breast cyst 0.00 0.00 0.00 0.0\n",
68
+ " Bursitis 0.00 0.00 0.00 1.0\n",
69
+ " Carbon monoxide poisoning 0.00 0.00 0.00 0.0\n",
70
+ " Cellulitis or abscess of mouth 0.00 0.00 0.00 1.0\n",
71
+ " Cervicitis 0.00 0.00 0.00 0.0\n",
72
+ " Chalazion 0.00 0.00 0.00 0.0\n",
73
+ " Cholecystitis 0.00 0.00 0.00 0.0\n",
74
+ " Choledocholithiasis 0.00 0.00 0.00 0.0\n",
75
+ " Cholesteatoma 0.00 0.00 0.00 0.0\n",
76
+ " Chondromalacia of the patella 0.00 0.00 0.00 0.0\n",
77
+ " Chronic back pain 0.00 0.00 0.00 0.0\n",
78
+ " Chronic glaucoma 0.00 0.00 0.00 1.0\n",
79
+ " Chronic kidney disease 0.00 0.00 0.00 0.0\n",
80
+ " Chronic obstructive pulmonary disease (COPD) 0.00 0.00 0.00 0.0\n",
81
+ " Chronic otitis media 0.00 0.00 0.00 1.0\n",
82
+ " Chronic pain disorder 0.00 0.00 0.00 1.0\n",
83
+ " Chronic pancreatitis 0.00 0.00 0.00 1.0\n",
84
+ " Chronic rheumatic fever 0.00 0.00 0.00 0.0\n",
85
+ " Chronic ulcer 0.00 0.00 0.00 0.0\n",
86
+ " Cirrhosis 0.00 0.00 0.00 1.0\n",
87
+ " Cold sore 0.00 0.00 0.00 0.0\n",
88
+ " Colorectal cancer 0.00 0.00 0.00 0.0\n",
89
+ " Congenital rubella 0.00 0.00 0.00 1.0\n",
90
+ " Conjunctivitis due to allergy 0.00 0.00 0.00 1.0\n",
91
+ " Coronary atherosclerosis 0.00 0.00 0.00 1.0\n",
92
+ " Croup 0.00 0.00 0.00 0.0\n",
93
+ " Crushing injury 0.00 0.00 0.00 1.0\n",
94
+ " Cyst of the eyelid 0.00 0.00 0.00 1.0\n",
95
+ " Cystic Fibrosis 0.00 0.00 0.00 1.0\n",
96
+ " Cytomegalovirus infection 0.00 0.00 0.00 1.0\n",
97
+ " De Quervain disease 0.00 0.00 0.00 1.0\n",
98
+ " Degenerative disc disease 0.00 0.00 0.00 1.0\n",
99
+ " Dengue fever 0.00 0.00 0.00 0.0\n",
100
+ " Depression 0.00 0.00 0.00 0.0\n",
101
+ " Diabetes insipidus 0.00 0.00 0.00 1.0\n",
102
+ " Diaper rash 0.00 0.00 0.00 0.0\n",
103
+ " Dislocation of the ankle 0.00 0.00 0.00 0.0\n",
104
+ " Dislocation of the finger 0.00 0.00 0.00 1.0\n",
105
+ " Dislocation of the foot 0.00 0.00 0.00 1.0\n",
106
+ " Dislocation of the hip 0.00 0.00 0.00 1.0\n",
107
+ " Dislocation of the shoulder 0.00 0.00 0.00 0.0\n",
108
+ " Dissociative disorder 0.00 0.00 0.00 1.0\n",
109
+ " Down syndrome 0.00 0.00 0.00 1.0\n",
110
+ " Drug abuse (cocaine) 0.00 0.00 0.00 0.0\n",
111
+ " Drug reaction 0.00 0.00 0.00 1.0\n",
112
+ " Dry eye of unknown cause 0.00 0.00 0.00 0.0\n",
113
+ " Dyshidrosis 0.00 0.00 0.00 1.0\n",
114
+ " Ear drum damage 0.00 0.00 0.00 0.0\n",
115
+ " Ear wax impaction 0.00 0.00 0.00 1.0\n",
116
+ " Emphysema 0.00 0.00 0.00 0.0\n",
117
+ " Empyema 0.00 0.00 0.00 1.0\n",
118
+ " Encephalitis 0.00 0.00 0.00 0.0\n",
119
+ " Endocarditis 0.00 0.00 0.00 1.0\n",
120
+ " Endometrial hyperplasia 0.00 0.00 0.00 1.0\n",
121
+ " Esophageal cancer 0.00 0.00 0.00 0.0\n",
122
+ " Essential tremor 0.00 0.00 0.00 1.0\n",
123
+ " Factitious disorder 0.00 0.00 0.00 1.0\n",
124
+ " Fat embolism 0.00 0.00 0.00 1.0\n",
125
+ " Female genitalia infection 0.00 0.00 0.00 1.0\n",
126
+ " Fibroadenoma 0.00 0.00 0.00 1.0\n",
127
+ " Fibromyalgia 0.00 0.00 0.00 0.0\n",
128
+ " Floaters 0.00 0.00 0.00 0.0\n",
129
+ " Fluid overload 0.00 0.00 0.00 1.0\n",
130
+ " Foreign body in the eye 0.00 0.00 0.00 0.0\n",
131
+ " Foreign body in the throat 0.00 0.00 0.00 0.0\n",
132
+ " Foreign body in the vagina 0.00 0.00 0.00 0.0\n",
133
+ " Fracture of the ankle 0.00 0.00 0.00 1.0\n",
134
+ " Fracture of the arm 0.00 0.00 0.00 1.0\n",
135
+ " Fracture of the finger 0.00 0.00 0.00 0.0\n",
136
+ " Fracture of the hand 0.00 0.00 0.00 0.0\n",
137
+ " Fracture of the jaw 0.00 0.00 0.00 1.0\n",
138
+ " Fracture of the leg 0.00 0.00 0.00 0.0\n",
139
+ " Fracture of the patella 0.00 0.00 0.00 1.0\n",
140
+ " G6PD enzyme deficiency 0.00 0.00 0.00 0.0\n",
141
+ " Galactorrhea of unknown cause 0.00 0.00 0.00 0.0\n",
142
+ " Gallstone 0.00 0.00 0.00 0.0\n",
143
+ " Gastritis 0.00 0.00 0.00 0.0\n",
144
+ " Gastroduodenal ulcer 0.00 0.00 0.00 1.0\n",
145
+ " Gout 0.00 0.00 0.00 0.0\n",
146
+ " Granuloma inguinale 0.00 0.00 0.00 0.0\n",
147
+ " Gynecomastia 0.00 0.00 0.00 0.0\n",
148
+ " Hashimoto thyroiditis 0.00 0.00 0.00 1.0\n",
149
+ " Head and neck cancer 0.00 0.00 0.00 1.0\n",
150
+ " Heart attack 0.00 0.00 0.00 1.0\n",
151
+ " Heart contusion 0.00 0.00 0.00 0.0\n",
152
+ " Heart failure 0.00 0.00 0.00 1.0\n",
153
+ " Hemarthrosis 0.00 0.00 0.00 1.0\n",
154
+ " Hematoma 0.00 0.00 0.00 1.0\n",
155
+ " Hemolytic anemia 0.00 0.00 0.00 1.0\n",
156
+ " High blood pressure 0.00 0.00 0.00 0.0\n",
157
+ " Hirsutism 0.00 0.00 0.00 1.0\n",
158
+ " Human immunodeficiency virus infection (HIV) 0.00 0.00 0.00 1.0\n",
159
+ " Hydatidiform mole 0.00 0.00 0.00 1.0\n",
160
+ " Hydrocele of the testicle 0.00 0.00 0.00 0.0\n",
161
+ " Hydronephrosis 0.00 0.00 0.00 1.0\n",
162
+ " Hyperemesis gravidarum 0.00 0.00 0.00 0.0\n",
163
+ " Hypergammaglobulinemia 0.00 0.00 0.00 1.0\n",
164
+ " Hyperkalemia 0.00 0.00 0.00 0.0\n",
165
+ " Hypernatremia 0.00 0.00 0.00 1.0\n",
166
+ "Hypertrophic obstructive cardiomyopathy (HOCM) 0.00 0.00 0.00 1.0\n",
167
+ " Hyponatremia 0.00 0.00 0.00 0.0\n",
168
+ " Impetigo 0.00 0.00 0.00 1.0\n",
169
+ " Indigestion 0.00 0.00 0.00 1.0\n",
170
+ " Infectious gastroenteritis 0.00 0.00 0.00 1.0\n",
171
+ " Ingrown toe nail 0.00 0.00 0.00 1.0\n",
172
+ " Inguinal hernia 0.00 0.00 0.00 0.0\n",
173
+ " Injury of the ankle 0.00 0.00 0.00 0.0\n",
174
+ " Injury to the abdomen 0.00 0.00 0.00 1.0\n",
175
+ " Injury to the finger 0.00 0.00 0.00 1.0\n",
176
+ " Injury to the hip 0.00 0.00 0.00 1.0\n",
177
+ " Injury to the knee 0.00 0.00 0.00 0.0\n",
178
+ " Insect bite 0.00 0.00 0.00 0.0\n",
179
+ " Intestinal cancer 0.00 0.00 0.00 1.0\n",
180
+ " Intestinal malabsorption 0.00 0.00 0.00 1.0\n",
181
+ " Intestinal obstruction 0.00 0.00 0.00 0.0\n",
182
+ " Intracranial abscess 0.00 0.00 0.00 1.0\n",
183
+ " Irritable bowel syndrome 0.00 0.00 0.00 0.0\n",
184
+ " Kaposi sarcoma 0.00 0.00 0.00 1.0\n",
185
+ " Kidney cancer 0.00 0.00 0.00 1.0\n",
186
+ " Kidney stone 0.00 0.00 0.00 1.0\n",
187
+ " Knee ligament or meniscus tear 0.00 0.00 0.00 1.0\n",
188
+ " Lactose intolerance 0.00 0.00 0.00 1.0\n",
189
+ " Leishmaniasis 0.00 0.00 0.00 1.0\n",
190
+ " Lichen planus 0.00 0.00 0.00 1.0\n",
191
+ " Lipoma 0.00 0.00 0.00 1.0\n",
192
+ " Lung cancer 0.00 0.00 0.00 1.0\n",
193
+ " Lymphadenitis 0.00 0.00 0.00 0.0\n",
194
+ " Lymphangitis 0.00 0.00 0.00 1.0\n",
195
+ " Lymphogranuloma venereum 0.00 0.00 0.00 1.0\n",
196
+ " Magnesium deficiency 0.00 0.00 0.00 1.0\n",
197
+ " Malignant hypertension 0.00 0.00 0.00 1.0\n",
198
+ " Marijuana abuse 0.00 0.00 0.00 0.0\n",
199
+ " Mastoiditis 0.00 0.00 0.00 1.0\n",
200
+ " Meckel diverticulum 0.00 0.00 0.00 0.0\n",
201
+ " Migraine 0.00 0.00 0.00 1.0\n",
202
+ " Mitral valve disease 0.00 0.00 0.00 1.0\n",
203
+ " Molluscum contagiosum 0.00 0.00 0.00 1.0\n",
204
+ " Mononucleosis 0.00 0.00 0.00 0.0\n",
205
+ " Moyamoya disease 0.00 0.00 0.00 0.0\n",
206
+ " Mucositis 0.00 0.00 0.00 0.0\n",
207
+ " Mumps 0.00 0.00 0.00 1.0\n",
208
+ " Muscle spasm 0.00 0.00 0.00 1.0\n",
209
+ " Narcolepsy 0.00 0.00 0.00 0.0\n",
210
+ " Neonatal jaundice 0.00 0.00 0.00 1.0\n",
211
+ " Neurosis 0.00 0.00 0.00 0.0\n",
212
+ " Noninfectious gastroenteritis 0.00 0.00 0.00 0.0\n",
213
+ " Obstructive sleep apnea (OSA) 0.00 0.00 0.00 1.0\n",
214
+ " Onychomycosis 0.00 0.00 0.00 0.0\n",
215
+ " Open wound of the cheek 0.00 0.00 0.00 1.0\n",
216
+ " Open wound of the finger 0.00 0.00 0.00 0.0\n",
217
+ " Open wound of the hand 0.00 0.00 0.00 1.0\n",
218
+ " Open wound of the head 0.00 0.00 0.00 1.0\n",
219
+ " Open wound of the hip 0.00 0.00 0.00 0.0\n",
220
+ " Open wound of the mouth 0.00 0.00 0.00 1.0\n",
221
+ " Open wound of the neck 0.00 0.00 0.00 1.0\n",
222
+ " Open wound of the shoulder 0.00 0.00 0.00 0.0\n",
223
+ " Oral leukoplakia 0.00 0.00 0.00 0.0\n",
224
+ " Oral mucosal lesion 0.00 0.00 0.00 0.0\n",
225
+ " Oral thrush (yeast infection) 0.00 0.00 0.00 1.0\n",
226
+ " Osteoarthritis 0.00 0.00 0.00 0.0\n",
227
+ " Otitis externa (swimmer's ear) 0.00 0.00 0.00 0.0\n",
228
+ " Pancreatic cancer 0.00 0.00 0.00 1.0\n",
229
+ " Panic disorder 0.00 0.00 0.00 0.0\n",
230
+ " Parkinson disease 0.00 0.00 0.00 0.0\n",
231
+ " Paronychia 0.00 0.00 0.00 0.0\n",
232
+ " Patau syndrome 0.00 0.00 0.00 0.0\n",
233
+ " Pelvic fistula 0.00 0.00 0.00 1.0\n",
234
+ " Pelvic organ prolapse 0.00 0.00 0.00 0.0\n",
235
+ " Pemphigus 0.00 0.00 0.00 0.0\n",
236
+ " Pericarditis 0.00 0.00 0.00 1.0\n",
237
+ " Perirectal infection 0.00 0.00 0.00 1.0\n",
238
+ " Peritonsillar abscess 0.00 0.00 0.00 1.0\n",
239
+ " Personality disorder 0.00 0.00 0.00 0.0\n",
240
+ " Phimosis 0.00 0.00 0.00 1.0\n",
241
+ " Pilonidal cyst 0.00 0.00 0.00 1.0\n",
242
+ " Placental abruption 0.00 0.00 0.00 1.0\n",
243
+ " Pleural effusion 0.00 0.00 0.00 1.0\n",
244
+ " Pneumonia 0.00 0.00 0.00 0.0\n",
245
+ " Pneumothorax 0.00 0.00 0.00 1.0\n",
246
+ " Poisoning due to analgesics 0.00 0.00 0.00 1.0\n",
247
+ " Poisoning due to antidepressants 0.00 0.00 0.00 0.0\n",
248
+ " Polycystic ovarian syndrome (PCOS) 0.00 0.00 0.00 0.0\n",
249
+ " Premature ovarian failure 0.00 0.00 0.00 1.0\n",
250
+ " Premenstrual tension syndrome 0.00 0.00 0.00 0.0\n",
251
+ " Problem during pregnancy 0.00 0.00 0.00 0.0\n",
252
+ " Protein deficiency 0.00 0.00 0.00 0.0\n",
253
+ " Pseudohypoparathyroidism 0.00 0.00 0.00 1.0\n",
254
+ " Psoriasis 0.00 0.00 0.00 0.0\n",
255
+ " Psychotic disorder 0.00 0.00 0.00 1.0\n",
256
+ " Pulmonary embolism 0.00 0.00 0.00 0.0\n",
257
+ " Pulmonary eosinophilia 0.00 0.00 0.00 1.0\n",
258
+ " Pulmonary fibrosis 0.00 0.00 0.00 0.0\n",
259
+ " Pyelonephritis 0.00 0.00 0.00 0.0\n",
260
+ " Pyloric stenosis 0.00 0.00 0.00 1.0\n",
261
+ " Rabies 0.00 0.00 0.00 0.0\n",
262
+ " Reactive arthritis 0.00 0.00 0.00 1.0\n",
263
+ " Sarcoidosis 0.00 0.00 0.00 1.0\n",
264
+ " Scarlet fever 0.00 0.00 0.00 1.0\n",
265
+ " Sciatica 0.00 0.00 0.00 0.0\n",
266
+ " Scoliosis 0.00 0.00 0.00 1.0\n",
267
+ " Scurvy 0.00 0.00 0.00 1.0\n",
268
+ " Sebaceous cyst 0.00 0.00 0.00 0.0\n",
269
+ " Sepsis 0.00 0.00 0.00 1.0\n",
270
+ " Septic arthritis 0.00 0.00 0.00 1.0\n",
271
+ " Shingles (herpes zoster) 0.00 0.00 0.00 0.0\n",
272
+ " Sickle cell crisis 0.00 0.00 0.00 1.0\n",
273
+ " Sjogren syndrome 0.00 0.00 0.00 1.0\n",
274
+ " Skin pigmentation disorder 0.00 0.00 0.00 1.0\n",
275
+ " Smoking or tobacco addiction 0.00 0.00 0.00 1.0\n",
276
+ " Spermatocele 0.00 0.00 0.00 1.0\n",
277
+ " Spondylitis 0.00 0.00 0.00 0.0\n",
278
+ " Spondylolisthesis 0.00 0.00 0.00 1.0\n",
279
+ " Spondylosis 0.00 0.00 0.00 0.0\n",
280
+ " Sporotrichosis 0.00 0.00 0.00 1.0\n",
281
+ " Sprain or strain 0.00 0.00 0.00 0.0\n",
282
+ " Stenosis of the tear duct 0.00 0.00 0.00 1.0\n",
283
+ " Strep throat 0.00 0.00 0.00 1.0\n",
284
+ " Stress incontinence 0.00 0.00 0.00 1.0\n",
285
+ " Stroke 0.00 0.00 0.00 1.0\n",
286
+ " Subarachnoid hemorrhage 0.00 0.00 0.00 1.0\n",
287
+ " Subconjunctival hemorrhage 0.00 0.00 0.00 1.0\n",
288
+ " Tendinitis 0.00 0.00 0.00 1.0\n",
289
+ " Testicular torsion 0.00 0.00 0.00 1.0\n",
290
+ " Thoracic aortic aneurysm 0.00 0.00 0.00 1.0\n",
291
+ " Tietze syndrome 0.00 0.00 0.00 0.0\n",
292
+ " Tonsillar hypertrophy 0.00 0.00 0.00 1.0\n",
293
+ " Tonsillitis 0.00 0.00 0.00 0.0\n",
294
+ " Tooth abscess 0.00 0.00 0.00 0.0\n",
295
+ " Tooth disorder 0.00 0.00 0.00 0.0\n",
296
+ " Torticollis 0.00 0.00 0.00 1.0\n",
297
+ " Tourette syndrome 0.00 0.00 0.00 1.0\n",
298
+ " Toxoplasmosis 0.00 0.00 0.00 1.0\n",
299
+ " Tracheitis 0.00 0.00 0.00 1.0\n",
300
+ " Transient ischemic attack 0.00 0.00 0.00 0.0\n",
301
+ " Trichinosis 0.00 0.00 0.00 1.0\n",
302
+ " Trichomonas infection 0.00 0.00 0.00 1.0\n",
303
+ " Tricuspid valve disease 0.00 0.00 0.00 1.0\n",
304
+ " Turner syndrome 0.00 0.00 0.00 1.0\n",
305
+ " Urethral stricture 0.00 0.00 0.00 0.0\n",
306
+ " Urge incontinence 0.00 0.00 0.00 1.0\n",
307
+ " Urinary tract obstruction 0.00 0.00 0.00 0.0\n",
308
+ " Vaginal yeast infection 0.00 0.00 0.00 0.0\n",
309
+ " Vaginitis 0.00 0.00 0.00 0.0\n",
310
+ " Varicocele of the testicles 0.00 0.00 0.00 1.0\n",
311
+ " Viral exanthem 0.00 0.00 0.00 1.0\n",
312
+ " Viral warts 0.00 0.00 0.00 0.0\n",
313
+ " Vitamin A deficiency 0.00 0.00 0.00 1.0\n",
314
+ " Vitreous degeneration 0.00 0.00 0.00 0.0\n",
315
+ " Vulvar cancer 0.00 0.00 0.00 1.0\n",
316
+ " Vulvar disorder 0.00 0.00 0.00 1.0\n",
317
+ " Vulvodynia 0.00 0.00 0.00 1.0\n",
318
+ " West Nile virus 0.00 0.00 0.00 1.0\n",
319
+ " Whooping cough 0.00 0.00 0.00 0.0\n",
320
+ " Wilson disease 0.00 0.00 0.00 0.0\n",
321
+ "\n",
322
+ " accuracy 0.00 160.0\n",
323
+ " macro avg 0.00 0.00 0.00 160.0\n",
324
+ " weighted avg 0.00 0.00 0.00 160.0\n",
325
+ "\n"
326
+ ]
327
+ },
328
+ {
329
+ "output_type": "stream",
330
+ "name": "stderr",
331
+ "text": [
332
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
333
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
334
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
335
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
336
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
337
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
338
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
339
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
340
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
341
+ " _warn_prf(average, modifier, msg_start, len(result))\n",
342
+ "/usr/local/lib/python3.9/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n",
343
+ " _warn_prf(average, modifier, msg_start, len(result))\n"
344
+ ]
345
+ }
346
+ ],
347
+ "source": [
348
+ "import pandas as pd\n",
349
+ "import re\n",
350
+ "import spacy\n",
351
+ "from sklearn.model_selection import train_test_split\n",
352
+ "from sklearn.feature_extraction.text import TfidfVectorizer\n",
353
+ "from sklearn.pipeline import Pipeline\n",
354
+ "from sklearn.metrics import accuracy_score, classification_report\n",
355
+ "from sklearn.linear_model import LogisticRegression\n",
356
+ "\n",
357
+ "# Load the data\n",
358
+ "data = pd.read_csv('symptomssingle.csv')\n",
359
+ "\n",
360
+ "# Check for any missing values and remove them\n",
361
+ "data = data.dropna()\n",
362
+ "\n",
363
+ "# Define a function to separate symptoms and diseases from the text\n",
364
+ "def separate_symptoms_and_diseases(text):\n",
365
+ " symptoms = re.findall(r'{\"symptoms\":\"(.*?)\"}', text)\n",
366
+ " disease = re.sub(r'(?:{\"symptoms\":\".*?\"},?)+', '', text).strip()\n",
367
+ " disease = disease.replace('],', '').strip() # Remove '],' from the disease name\n",
368
+ " return symptoms, disease\n",
369
+ "\n",
370
+ "# Apply the function to the data\n",
371
+ "data['symptoms_and_diseases'] = data['data'].apply(separate_symptoms_and_diseases)\n",
372
+ "data[['symptoms', 'disease']] = pd.DataFrame(data['symptoms_and_diseases'].tolist(), index=data.index)\n",
373
+ "data = data.drop(columns=['data', 'symptoms_and_diseases'])\n",
374
+ "\n",
375
+ "# Load the spaCy model\n",
376
+ "nlp = spacy.load('en_core_web_sm')\n",
377
+ "\n",
378
+ "# Preprocessing function\n",
379
+ "def preprocess(symptoms):\n",
380
+ " processed_symptoms = []\n",
381
+ " for symptom in symptoms:\n",
382
+ " doc = nlp(symptom)\n",
383
+ " processed_symptom = ' '.join(token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha)\n",
384
+ " processed_symptoms.append(processed_symptom)\n",
385
+ " return ' '.join(processed_symptoms)\n",
386
+ "\n",
387
+ "# Preprocess the symptoms column\n",
388
+ "data['symptoms_preprocessed'] = data['symptoms'].apply(preprocess)\n",
389
+ "\n",
390
+ "\n",
391
+ "# Split the data into train and test sets\n",
392
+ "X_train, X_test, y_train, y_test = train_test_split(data['symptoms_preprocessed'], data['disease'], test_size=0.2, random_state=42)\n",
393
+ "\n",
394
+ "# Create a pipeline for text classification\n",
395
+ "pipeline = Pipeline([\n",
396
+ " ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),\n",
397
+ " ('classifier', LogisticRegression(solver='liblinear', C=10))\n",
398
+ "])\n",
399
+ "\n",
400
+ "# Train the model\n",
401
+ "pipeline.fit(X_train, y_train)\n",
402
+ "\n",
403
+ "# Make predictions\n",
404
+ "y_pred = pipeline.predict(X_test)\n",
405
+ "\n",
406
+ "# Evaluate the model\n",
407
+ "print(\"Accuracy: \", accuracy_score(y_test, y_pred))\n",
408
+ "print(\"Classification Report:\\n\", classification_report(y_test, y_pred))\n"
409
+ ]
410
+ },
411
+ {
412
+ "cell_type": "code",
413
+ "source": [
414
+ "!pip install joblib\n",
415
+ "import joblib\n",
416
+ "\n",
417
+ "# Save the trained model\n",
418
+ "joblib.dump(pipeline, 'DiseasePredictionBasedonSymptoms.joblib')\n"
419
+ ],
420
+ "metadata": {
421
+ "colab": {
422
+ "base_uri": "https://localhost:8080/"
423
+ },
424
+ "id": "KGsxAjX2mNH6",
425
+ "outputId": "9cdcae24-8e5d-43f5-c321-40b3fedd2519"
426
+ },
427
+ "execution_count": 9,
428
+ "outputs": [
429
+ {
430
+ "output_type": "stream",
431
+ "name": "stdout",
432
+ "text": [
433
+ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
434
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.9/dist-packages (1.1.1)\n"
435
+ ]
436
+ },
437
+ {
438
+ "output_type": "execute_result",
439
+ "data": {
440
+ "text/plain": [
441
+ "['DiseasePredictionBasedonSymptoms.joblib']"
442
+ ]
443
+ },
444
+ "metadata": {},
445
+ "execution_count": 9
446
+ }
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "source": [
452
+ "import joblib\n",
453
+ "\n",
454
+ "# Load the saved model\n",
455
+ "loaded_pipeline = joblib.load('DiseasePredictionBasedonSymptoms.joblib')\n",
456
+ "\n",
457
+ "# Make predictions using the loaded model (example)\n",
458
+ "sample_symptom = \"Skin Rash\"\n",
459
+ "processed_symptom = preprocess([sample_symptom])\n",
460
+ "prediction = loaded_pipeline.predict([processed_symptom])\n",
461
+ "\n",
462
+ "print(\"Predicted disease:\", prediction[0])\n"
463
+ ],
464
+ "metadata": {
465
+ "colab": {
466
+ "base_uri": "https://localhost:8080/"
467
+ },
468
+ "id": "JEuWqGV-mWew",
469
+ "outputId": "292c024d-e739-4c7a-c530-093edd85b08d"
470
+ },
471
+ "execution_count": 10,
472
+ "outputs": [
473
+ {
474
+ "output_type": "stream",
475
+ "name": "stdout",
476
+ "text": [
477
+ "Predicted disease: Contact dermatitis\n"
478
+ ]
479
+ }
480
+ ]
481
+ }
482
+ ]
483
+ }
medicalsymptoms1.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """medicalsymptoms1.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1uRT7zfEMnu-tq74GyZoUUtAb-In4XtX8
8
+ """
9
+
10
+ import pandas as pd
11
+ import re
12
+ import spacy
13
+ from sklearn.model_selection import train_test_split
14
+ from sklearn.feature_extraction.text import TfidfVectorizer
15
+ from sklearn.pipeline import Pipeline
16
+ from sklearn.metrics import accuracy_score, classification_report
17
+ from sklearn.linear_model import LogisticRegression
18
+
19
+ # Load the data
20
+ data = pd.read_csv('symptomssingle.csv')
21
+
22
+ # Check for any missing values and remove them
23
+ data = data.dropna()
24
+
25
+ # Define a function to separate symptoms and diseases from the text
26
+ def separate_symptoms_and_diseases(text):
27
+ symptoms = re.findall(r'{"symptoms":"(.*?)"}', text)
28
+ disease = re.sub(r'(?:{"symptoms":".*?"},?)+', '', text).strip()
29
+ disease = disease.replace('],', '').strip() # Remove '],' from the disease name
30
+ return symptoms, disease
31
+
32
+ # Apply the function to the data
33
+ data['symptoms_and_diseases'] = data['data'].apply(separate_symptoms_and_diseases)
34
+ data[['symptoms', 'disease']] = pd.DataFrame(data['symptoms_and_diseases'].tolist(), index=data.index)
35
+ data = data.drop(columns=['data', 'symptoms_and_diseases'])
36
+
37
+ # Load the spaCy model
38
+ nlp = spacy.load('en_core_web_sm')
39
+
40
+ # Preprocessing function
41
+ def preprocess(symptoms):
42
+ processed_symptoms = []
43
+ for symptom in symptoms:
44
+ doc = nlp(symptom)
45
+ processed_symptom = ' '.join(token.lemma_.lower() for token in doc if not token.is_stop and token.is_alpha)
46
+ processed_symptoms.append(processed_symptom)
47
+ return ' '.join(processed_symptoms)
48
+
49
+ # Preprocess the symptoms column
50
+ data['symptoms_preprocessed'] = data['symptoms'].apply(preprocess)
51
+
52
+
53
+ # Split the data into train and test sets
54
+ X_train, X_test, y_train, y_test = train_test_split(data['symptoms_preprocessed'], data['disease'], test_size=0.2, random_state=42)
55
+
56
+ # Create a pipeline for text classification
57
+ pipeline = Pipeline([
58
+ ('tfidf', TfidfVectorizer(ngram_range=(1, 2))),
59
+ ('classifier', LogisticRegression(solver='liblinear', C=10))
60
+ ])
61
+
62
+ # Train the model
63
+ pipeline.fit(X_train, y_train)
64
+
65
+ # Make predictions
66
+ y_pred = pipeline.predict(X_test)
67
+
68
+ # Evaluate the model
69
+ print("Accuracy: ", accuracy_score(y_test, y_pred))
70
+ print("Classification Report:\n", classification_report(y_test, y_pred))
71
+
72
+ !pip install joblib
73
+ import joblib
74
+
75
+ # Save the trained model
76
+ joblib.dump(pipeline, 'DiseasePredictionBasedonSymptoms.joblib')
77
+
78
+ import joblib
79
+
80
+ # Load the saved model
81
+ loaded_pipeline = joblib.load('DiseasePredictionBasedonSymptoms.joblib')
82
+
83
+ # Make predictions using the loaded model (example)
84
+ sample_symptom = "Skin Rash"
85
+ processed_symptom = preprocess([sample_symptom])
86
+ prediction = loaded_pipeline.predict([processed_symptom])
87
+
88
+ print("Predicted disease:", prediction[0])
symptomssingle.csv ADDED
The diff for this file is too large to render. See raw diff