Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -63,39 +63,37 @@ def load_data():
|
|
63 |
# Encode diseases in a dictionary
|
64 |
disease_dict = {
|
65 |
'Fungal infection': 0, 'Allergy': 1, 'GERD': 2, 'Chronic cholestasis': 3, 'Drug Reaction': 4,
|
66 |
-
'Peptic ulcer
|
67 |
-
'Hypertension': 10, 'Migraine': 11, 'Cervical spondylosis': 12, 'Paralysis (brain hemorrhage)': 13,
|
68 |
-
'Jaundice': 14, 'Malaria': 15, 'Chicken pox': 16, 'Dengue': 17, 'Typhoid': 18,
|
69 |
-
'Hepatitis
|
70 |
-
'
|
71 |
-
'Heart attack':
|
72 |
-
'Hypoglycemia':
|
73 |
-
'(vertigo) Paroxysmal Positional Vertigo':
|
74 |
-
'Psoriasis':
|
75 |
}
|
76 |
|
77 |
# Replace prognosis values with numerical categories
|
78 |
df.replace({'prognosis': disease_dict}, inplace=True)
|
79 |
|
80 |
-
#
|
81 |
print("Unique values in prognosis after mapping:", df['prognosis'].unique())
|
82 |
|
83 |
# Ensure prognosis is purely numerical after mapping
|
84 |
if df['prognosis'].dtype == 'object': # Check for unmapped entries
|
85 |
raise ValueError(f"The prognosis contains unmapped values: {df['prognosis'].unique()}")
|
86 |
|
87 |
-
df['prognosis'] = df['prognosis'].astype(int) # Convert to integer
|
88 |
-
|
89 |
-
# Inference doesn't require fixing as copy=True defaults
|
90 |
-
df = df.infer_objects()
|
91 |
|
92 |
tr.replace({'prognosis': disease_dict}, inplace=True)
|
93 |
|
94 |
-
#
|
95 |
if tr['prognosis'].dtype == 'object':
|
96 |
raise ValueError(f"Testing data prognosis contains unmapped values: {tr['prognosis'].unique()}")
|
97 |
-
|
98 |
-
tr['prognosis'] = tr['prognosis'].astype(int) # Convert to integer
|
99 |
tr = tr.infer_objects() # Remove 'copy' argument
|
100 |
|
101 |
return df, tr, disease_dict
|
@@ -109,7 +107,7 @@ y_test = tr['prognosis']
|
|
109 |
|
110 |
# Encode the target variable with LabelEncoder if still in string format
|
111 |
le = LabelEncoder()
|
112 |
-
y_encoded = le.fit_transform(y) # Needs to be
|
113 |
|
114 |
def train_models():
|
115 |
models = {
|
|
|
63 |
# Encode diseases in a dictionary
|
64 |
disease_dict = {
|
65 |
'Fungal infection': 0, 'Allergy': 1, 'GERD': 2, 'Chronic cholestasis': 3, 'Drug Reaction': 4,
|
66 |
+
'Peptic ulcer diseae': 5, 'AIDS': 6, 'Diabetes ': 7, 'Gastroenteritis': 8, 'Bronchial Asthma': 9,
|
67 |
+
'Hypertension ': 10, 'Migraine': 11, 'Cervical spondylosis': 12, 'Paralysis (brain hemorrhage)': 13,
|
68 |
+
'Jaundice': 14, 'Malaria': 15, 'Chicken pox': 16, 'Dengue': 17, 'Typhoid': 18, 'Hepatitis A': 19,
|
69 |
+
'Hepatitis B': 20, 'Hepatitis C': 21, 'Hepatitis D': 22, 'Hepatitis E': 23, 'Alcoholic hepatitis': 24,
|
70 |
+
'Tuberculosis': 25, 'Common Cold': 26, 'Pneumonia': 27, 'Dimorphic hemorrhoids(piles)': 28,
|
71 |
+
'Heart attack': 29, 'Varicose veins': 30, 'Hypothyroidism': 31, 'Hyperthyroidism': 32,
|
72 |
+
'Hypoglycemia': 33, 'Osteoarthritis': 34, 'Arthritis': 35,
|
73 |
+
'(vertigo) Paroxysmal Positional Vertigo': 36, 'Acne': 37, 'Urinary tract infection': 38,
|
74 |
+
'Psoriasis': 39, 'Impetigo': 40
|
75 |
}
|
76 |
|
77 |
# Replace prognosis values with numerical categories
|
78 |
df.replace({'prognosis': disease_dict}, inplace=True)
|
79 |
|
80 |
+
# Check unique values in prognosis for debugging
|
81 |
print("Unique values in prognosis after mapping:", df['prognosis'].unique())
|
82 |
|
83 |
# Ensure prognosis is purely numerical after mapping
|
84 |
if df['prognosis'].dtype == 'object': # Check for unmapped entries
|
85 |
raise ValueError(f"The prognosis contains unmapped values: {df['prognosis'].unique()}")
|
86 |
|
87 |
+
df['prognosis'] = df['prognosis'].astype(int) # Convert to integer if necessary
|
88 |
+
df = df.infer_objects() # Remove 'copy' argument
|
|
|
|
|
89 |
|
90 |
tr.replace({'prognosis': disease_dict}, inplace=True)
|
91 |
|
92 |
+
# Check unique values in testing data
|
93 |
if tr['prognosis'].dtype == 'object':
|
94 |
raise ValueError(f"Testing data prognosis contains unmapped values: {tr['prognosis'].unique()}")
|
95 |
+
|
96 |
+
tr['prognosis'] = tr['prognosis'].astype(int) # Convert to integer if necessary
|
97 |
tr = tr.infer_objects() # Remove 'copy' argument
|
98 |
|
99 |
return df, tr, disease_dict
|
|
|
107 |
|
108 |
# Encode the target variable with LabelEncoder if still in string format
|
109 |
le = LabelEncoder()
|
110 |
+
y_encoded = le.fit_transform(y) # Needs to be string labels, assuming df['prognosis'] has no numerical labels
|
111 |
|
112 |
def train_models():
|
113 |
models = {
|