urinary / predict_risks_ft.py
inspectrum's picture
Update predict_risks_ft.py
0cc58fb verified
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, roc_curve,f1_score, accuracy_score,classification_report,confusion_matrix, ConfusionMatrixDisplay
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from scipy.stats import mannwhitneyu,chi2_contingency
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.feature_selection import SelectKBest, chi2, f_classif
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from joblib import dump, load
import tensorflow as tf
import tensorflow.keras.models
import tensorflow.keras.layers
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
import sys
import json
def process_data(input_data,
pipeline_ur='pipeline_ur.joblib', model_ur_ft='fine_tuned_model_ur.h5', pipeline_ur_ft='pipeline_ur_ft.joblib'):
try:
if isinstance(input_data, dict):
pts = pd.DataFrame([input_data])
elif isinstance(input_data, list):
pts = pd.DataFrame(input_data)
else:
raise ValueError("Input must be a JSON object or list of objects.")
res = []
pts = read_json(pts)
print("Processed DataFrame columns:", pts.columns) # Debug columns
# Pass the processed DataFrame, not raw input
pts_score = return_score(pts, pipeline_ur, model_ur_ft, pipeline_ur_ft)
guid, rec_ur, expl_ur, diagnosis_ur, mcb_ur, final_score, nn_score, ds_score = display_score(pts_score)
res.append({
"GUID_Клиент": guid,
"urinary_risk": rec_ur,
"urinary_expl": expl_ur,
"urinary_diagnosis": diagnosis_ur,
"urinary_mcb": mcb_ur,
"urinary_fscore":final_score,
"urinary_nn":nn_score,
"urinary_score":ds_score
})
return res
except Exception as e:
return {"error": f"Processing failed: {str(e)}"}
#read json
def read_json(pts):
#preparation of data
pts['Nitrite,urine,bin'] = pts['urine_nit'].apply(lambda x: 1 if x=='Pos' else 0)
pts['Bilirubin,urine,pos']=pts['urine_bil'].apply(lambda x: 0 if x=='Neg' else 1)
pts['urine_ugb'] = pts['urine_ugb'].str.replace('umol/L', '')
pts = pts.replace({'Neg':0, 'Trace':0.01,'Pos':1})
numeric_columns = ['bmi', 'blood_pressure_1',
'blood_pressure_2', 'blood_сholesterol',
'blood_glucose', 'blood_wbc', 'blood_rbc', 'blood_hgb', 'blood_hct',
'blood_plt', 'blood_lym', 'blood_gra', 'blood_mid', 'blood_esr',
'urine_sg', 'urine_pH',
'urine_pro', 'urine_glu', 'urine_ket', 'urine_ugb', 'urine_bil',
'urine_bld_ery', 'urine_nit', 'urine_leu']
pts[numeric_columns] = pts[numeric_columns].astype(float)
#convertation
convert = {
'blood_glucose': 18.018,
'blood_сholesterol': 38.67,
'blood_hgb': 0.1,
'urine_ugb': 0.058,
'urine_bil': 0.0585,
'urine_pro': 100,
'urine_ket': 10.41,
'urine_glu': 18.018
}
for col in pts.columns:
if col in convert.keys():
cf = convert[col]
pts[col] = pts[col].apply(lambda x: x * cf)
pts['Absolute Lymphocyte Count,blood, (k/ul)'] = pts.apply(lambda row:
row['blood_wbc']*row['blood_lym']/100, axis=1)
pts['Granulocyte Count,blood,#/uL'] = pts.apply(lambda row:
row['blood_wbc']*row['blood_gra']/100, axis=1)
# categorical features
pts['hypertension'] = pts.apply(lambda row: 1 if row['blood_pressure_1'] >= 140 or row['blood_pressure_2'] >= 90 else 0, axis=1)
pts['smoke']=pts['smoking'].apply(lambda x: 1 if x=='Да' else 0)
pts['gender_bin']=pts['gender'].apply(lambda x: 1 if x=='Мужской' else 0)
pts['godРождения'] = pd.to_datetime(pts['bithdate']).dt.year
pts['anchor_age'] = 2024 - pts['godРождения']
pts['RBC,urine,pos'] = pts['urine_bld_ery'].apply(lambda x: 1 if x != 0 else 0)
pts['WBC,urine,pos'] = pts['urine_leu'].apply(lambda x: 1 if x != 0 else 0)
#ecg
ishemia = ['депрессия сегмента ST', 'нарушения коронарного кровообращения', 'ишемии миокарда','прирост з r']
pts['ecg_ischemia'] = pts['ecg_description'].apply(lambda x: 1 if any(el in x for el in ishemia) else 0)
necrosis=['рубцов','зубец Q','зубец QS']
pts['ecg_necrosis']=pts['ecg_description'].apply(lambda x: 1 if any(el in x for el in necrosis) else 0)
#rename
dic = {'blood_glucose':'Glucose, Blood,mg/dL',
'blood_сholesterol':'Cholesterol, Total, blood,mg/dL',
'blood_wbc':'WBC Count,blood,(K/uL)',
'blood_hgb':'Hemoglobin,blood,g/dL',
'blood_hct':'Hematocrit,blood,%',
'blood_plt':'Platelet Count,blood,K/uL',
'blood_esr':'Sedimentation Rate,blood,mm/hr',
'urine_sg':'Specific Gravity,urine, ',
'urine_pH':'pH,urine,units',
'urine_pro':'Protein,urine,mg/dL',
'urine_glu':'Glucose, Urine,mg/dL',
'urine_ket':'Ketone,urine,mg/dL',
'urine_ugb':'Urobilinogen,urine,mg/dL',
'bmi':'BMI'}
pts.rename(columns=dic, inplace=True)
# work with nans
dic_analysis = {'Cholesterol, Total, blood,mg/dL':130,
'Glucose, Blood,mg/dL':70,
'Glucose, Urine,mg/dL':0,
'Hematocrit,blood,%':34,
'Hemoglobin,blood,g/dL':12,
'Ketone,urine,mg/dL':0,
'Platelet Count,blood,K/uL':150,
'Protein,urine,mg/dL':0,
'Sedimentation Rate,blood,mm/hr':0,
'Specific Gravity,urine, ':1.001,
'Urobilinogen,urine,mg/dL':0,
'pH,urine,units':5,
'WBC Count,blood,(K/uL)':4,
'Absolute Lymphocyte Count,blood, (k/ul)':1.2,
'Granulocyte Count,blood,#/uL':2.2,
'Bilirubin,urine,pos':0,
'WBC,urine,pos':0,
'RBC,urine,pos':0}
pts = pts.fillna(dic_analysis)
print(pts)
return pts
# return risk score
def urinary_score(df):
y_score = []
for i in range(len(df)):
n = 0
if df.loc[i, 'Sedimentation Rate,blood,mm/hr'] > 20:
n += 1
if df.loc[i, 'WBC Count,blood,(K/uL)'] > 11:
n += 1
if df.loc[i, 'Specific Gravity,urine, '] > 1.035:
n += 1
if df.loc[i, 'pH,urine,units'] < 5:
n += 1
if df.loc[i, 'Protein,urine,mg/dL'] > 20:
n += 7
if df.loc[i, 'WBC,urine,pos'] == 1:
n += 6
if df.loc[i, 'RBC,urine,pos'] == 1:
n += 6
if df.loc[i, 'Nitrite,urine,bin'] == 1:
n += 4.4
y_score.append(n)
return y_score
def dia_score(df):
y_score = []
for i in range(len(df)):
n = 0
if df.loc[i, 'Glucose, Blood,mg/dL'] > 126:
n += 5
elif df.loc[i, 'Glucose, Blood,mg/dL'] >= 110:
n += 3.2
if df.loc[i, 'Ketone,urine,mg/dL'] > 3:
n += 1.2
if df.loc[i, 'Glucose, Urine,mg/dL'] > 20:
n += 2.4
if df.loc[i, 'BMI'] >= 30:
n += 4.3
elif df.loc[i, 'BMI'] > 25:
n += 2
y_score.append(n)
return y_score
def return_score(pts, pipeline_ur, model_ur_ft, pipeline_ur_ft):
# Loading the models and pipelines
pipeline_ur = load(pipeline_ur)
model_ur = load_model(model_ur_ft)
pipeline_ur2 = load(pipeline_ur_ft)
pts['dia_score'] = dia_score(pts)
pts['diabetes'] = pts['dia_score'].apply(lambda x: 1 if x >= 6 else 0)
# Urinary
cols_ur = ['Cholesterol, Total, blood,mg/dL', 'Glucose, Blood,mg/dL',
'Glucose, Urine,mg/dL', 'Granulocyte Count,blood,#/uL',
'Hematocrit,blood,%', 'Hemoglobin,blood,g/dL', 'Ketone,urine,mg/dL',
'Platelet Count,blood,K/uL', 'Protein,urine,mg/dL',
'Sedimentation Rate,blood,mm/hr', 'Specific Gravity,urine, ',
'Urobilinogen,urine,mg/dL', 'pH,urine,units', 'Absolute Lymphocyte Count,blood, (k/ul)',
'WBC Count,blood,(K/uL)', 'RBC,urine,pos', 'WBC,urine,pos',
'Bilirubin,urine,pos', 'Nitrite,urine,bin', 'gender_bin', 'anchor_age',
'BMI', 'hypertension', 'smoke', 'diabetes']
pts_nn = pts[cols_ur]
pts_nn = pipeline_ur.transform(pts_nn)
pts['score'] = urinary_score(pts)
pts['neuronet'] = model_ur.predict(pts_nn)
if pts.loc[0, 'score'] == 0 and pts.loc[0, 'neuronet'] > 0.5:
pts.loc[0, 'neuronet'] = pts.loc[0, 'neuronet'] / 10
cols = ['score', 'neuronet']
pts[['score_scaled', 'nn_scaled']] = pipeline_ur2.transform(pts[cols])
pts['comb_score_ur'] = pts['score_scaled'] + pts['nn_scaled']
pts['ur_score'] = pts['score']
pts['ur_nn'] = pts['neuronet']
return pts
# return risk and explanation
def display_score(pts):
guid = pts['GUID'].iloc[0]
final_score = pts['comb_score_ur'].iloc[0]
nn_score = pts['ur_nn'].iloc[0]
ds_score = pts['ur_score'].iloc[0]
# Urinary
rec_ur = ''
expl_ur = ''
diagnosis_ur = ''
mcb_ur = ''
if pts['comb_score_ur'].iloc[0] > 0:
rec_ur += 'Высокий риск ЗМПС\n'
diagnosis_ur = 'Расстройство мочевыделительной системы неуточненное\n'
mcb_ur = 'N39.9\n'
if pts['Sedimentation Rate,blood,mm/hr'].iloc[0] > 20:
expl_ur += 'Повышена СОЭ\n'
if pts['WBC Count,blood,(K/uL)'].iloc[0] > 11:
expl_ur += 'Лейкоцитоз\n'
if pts['Specific Gravity,urine, '].iloc[0] > 1.035:
expl_ur += 'Повышена плотность мочи\n'
if pts['pH,urine,units'].iloc[0] < 5:
expl_ur += 'Снижен pH мочи\n'
if pts['Protein,urine,mg/dL'].iloc[0] > 20:
expl_ur += 'Протеинурия\n'
if pts['WBC,urine,pos'].iloc[0] == 1:
expl_ur += 'Лейкоцитурия\n'
if pts['RBC,urine,pos'].iloc[0] == 1:
expl_ur += 'Эритроцитурия\n'
if pts['Nitrite,urine,bin'].iloc[0] == 1:
expl_ur += 'Обнаружены нитриты в моче\n'
elif pts['comb_score_ur'].iloc[0] < 0:
rec_ur += 'Нет риска ЗМПС\n'
else:
rec_ur += 'Умеренный риск ЗМПС\n'
if pts['Sedimentation Rate,blood,mm/hr'].iloc[0] > 20:
expl_ur += 'Повышена СОЭ\n'
if pts['WBC Count,blood,(K/uL)'].iloc[0] > 11:
expl_ur += 'Лейкоцитоз\n'
if pts['Specific Gravity,urine, '].iloc[0] > 1.035:
expl_ur += 'Повышена плотность мочи\n'
if pts['pH,urine,units'].iloc[0] < 5:
expl_ur += 'Снижен pH мочи\n'
if pts['Protein,urine,mg/dL'].iloc[0] > 20:
expl_ur += 'Протеинурия\n'
if pts['WBC,urine,pos'].iloc[0] == 1:
expl_ur += 'Лейкоцитурия\n'
if pts['RBC,urine,pos'].iloc[0] == 1:
expl_ur += 'Эритроцитурия\n'
if pts['Nitrite,urine,bin'].iloc[0] == 1:
expl_ur += 'Обнаружены нитриты в моче\n'
return guid, rec_ur, expl_ur, diagnosis_ur, mcb_ur, final_score, nn_score, ds_score