Spaces:
Running
Running
import numpy as np | |
import pandas as pd | |
from sklearn.metrics import roc_auc_score, roc_curve,f1_score, accuracy_score,classification_report,confusion_matrix, ConfusionMatrixDisplay | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.model_selection import train_test_split | |
from scipy.stats import mannwhitneyu,chi2_contingency | |
from sklearn.preprocessing import LabelEncoder,StandardScaler | |
from sklearn.feature_selection import SelectKBest, chi2, f_classif | |
from imblearn.over_sampling import SMOTE | |
from imblearn.pipeline import Pipeline as ImbPipeline | |
from joblib import dump, load | |
import tensorflow as tf | |
import tensorflow.keras.models | |
import tensorflow.keras.layers | |
from tensorflow.keras.models import Sequential, load_model | |
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization | |
import sys | |
import json | |
def process_data(input_data, | |
pipeline_ur='pipeline_ur.joblib', model_ur_ft='fine_tuned_model_ur.h5', pipeline_ur_ft='pipeline_ur_ft.joblib'): | |
try: | |
if isinstance(input_data, dict): | |
pts = pd.DataFrame([input_data]) | |
elif isinstance(input_data, list): | |
pts = pd.DataFrame(input_data) | |
else: | |
raise ValueError("Input must be a JSON object or list of objects.") | |
res = [] | |
pts = read_json(pts) | |
print("Processed DataFrame columns:", pts.columns) # Debug columns | |
# Pass the processed DataFrame, not raw input | |
pts_score = return_score(pts, pipeline_ur, model_ur_ft, pipeline_ur_ft) | |
guid, rec_ur, expl_ur, diagnosis_ur, mcb_ur, final_score, nn_score, ds_score = display_score(pts_score) | |
res.append({ | |
"GUID_Клиент": guid, | |
"urinary_risk": rec_ur, | |
"urinary_expl": expl_ur, | |
"urinary_diagnosis": diagnosis_ur, | |
"urinary_mcb": mcb_ur, | |
"urinary_fscore":final_score, | |
"urinary_nn":nn_score, | |
"urinary_score":ds_score | |
}) | |
return res | |
except Exception as e: | |
return {"error": f"Processing failed: {str(e)}"} | |
#read json | |
def read_json(pts): | |
#preparation of data | |
pts['Nitrite,urine,bin'] = pts['urine_nit'].apply(lambda x: 1 if x=='Pos' else 0) | |
pts['Bilirubin,urine,pos']=pts['urine_bil'].apply(lambda x: 0 if x=='Neg' else 1) | |
pts['urine_ugb'] = pts['urine_ugb'].str.replace('umol/L', '') | |
pts = pts.replace({'Neg':0, 'Trace':0.01,'Pos':1}) | |
numeric_columns = ['bmi', 'blood_pressure_1', | |
'blood_pressure_2', 'blood_сholesterol', | |
'blood_glucose', 'blood_wbc', 'blood_rbc', 'blood_hgb', 'blood_hct', | |
'blood_plt', 'blood_lym', 'blood_gra', 'blood_mid', 'blood_esr', | |
'urine_sg', 'urine_pH', | |
'urine_pro', 'urine_glu', 'urine_ket', 'urine_ugb', 'urine_bil', | |
'urine_bld_ery', 'urine_nit', 'urine_leu'] | |
pts[numeric_columns] = pts[numeric_columns].astype(float) | |
#convertation | |
convert = { | |
'blood_glucose': 18.018, | |
'blood_сholesterol': 38.67, | |
'blood_hgb': 0.1, | |
'urine_ugb': 0.058, | |
'urine_bil': 0.0585, | |
'urine_pro': 100, | |
'urine_ket': 10.41, | |
'urine_glu': 18.018 | |
} | |
for col in pts.columns: | |
if col in convert.keys(): | |
cf = convert[col] | |
pts[col] = pts[col].apply(lambda x: x * cf) | |
pts['Absolute Lymphocyte Count,blood, (k/ul)'] = pts.apply(lambda row: | |
row['blood_wbc']*row['blood_lym']/100, axis=1) | |
pts['Granulocyte Count,blood,#/uL'] = pts.apply(lambda row: | |
row['blood_wbc']*row['blood_gra']/100, axis=1) | |
# categorical features | |
pts['hypertension'] = pts.apply(lambda row: 1 if row['blood_pressure_1'] >= 140 or row['blood_pressure_2'] >= 90 else 0, axis=1) | |
pts['smoke']=pts['smoking'].apply(lambda x: 1 if x=='Да' else 0) | |
pts['gender_bin']=pts['gender'].apply(lambda x: 1 if x=='Мужской' else 0) | |
pts['godРождения'] = pd.to_datetime(pts['bithdate']).dt.year | |
pts['anchor_age'] = 2024 - pts['godРождения'] | |
pts['RBC,urine,pos'] = pts['urine_bld_ery'].apply(lambda x: 1 if x != 0 else 0) | |
pts['WBC,urine,pos'] = pts['urine_leu'].apply(lambda x: 1 if x != 0 else 0) | |
#ecg | |
ishemia = ['депрессия сегмента ST', 'нарушения коронарного кровообращения', 'ишемии миокарда','прирост з r'] | |
pts['ecg_ischemia'] = pts['ecg_description'].apply(lambda x: 1 if any(el in x for el in ishemia) else 0) | |
necrosis=['рубцов','зубец Q','зубец QS'] | |
pts['ecg_necrosis']=pts['ecg_description'].apply(lambda x: 1 if any(el in x for el in necrosis) else 0) | |
#rename | |
dic = {'blood_glucose':'Glucose, Blood,mg/dL', | |
'blood_сholesterol':'Cholesterol, Total, blood,mg/dL', | |
'blood_wbc':'WBC Count,blood,(K/uL)', | |
'blood_hgb':'Hemoglobin,blood,g/dL', | |
'blood_hct':'Hematocrit,blood,%', | |
'blood_plt':'Platelet Count,blood,K/uL', | |
'blood_esr':'Sedimentation Rate,blood,mm/hr', | |
'urine_sg':'Specific Gravity,urine, ', | |
'urine_pH':'pH,urine,units', | |
'urine_pro':'Protein,urine,mg/dL', | |
'urine_glu':'Glucose, Urine,mg/dL', | |
'urine_ket':'Ketone,urine,mg/dL', | |
'urine_ugb':'Urobilinogen,urine,mg/dL', | |
'bmi':'BMI'} | |
pts.rename(columns=dic, inplace=True) | |
# work with nans | |
dic_analysis = {'Cholesterol, Total, blood,mg/dL':130, | |
'Glucose, Blood,mg/dL':70, | |
'Glucose, Urine,mg/dL':0, | |
'Hematocrit,blood,%':34, | |
'Hemoglobin,blood,g/dL':12, | |
'Ketone,urine,mg/dL':0, | |
'Platelet Count,blood,K/uL':150, | |
'Protein,urine,mg/dL':0, | |
'Sedimentation Rate,blood,mm/hr':0, | |
'Specific Gravity,urine, ':1.001, | |
'Urobilinogen,urine,mg/dL':0, | |
'pH,urine,units':5, | |
'WBC Count,blood,(K/uL)':4, | |
'Absolute Lymphocyte Count,blood, (k/ul)':1.2, | |
'Granulocyte Count,blood,#/uL':2.2, | |
'Bilirubin,urine,pos':0, | |
'WBC,urine,pos':0, | |
'RBC,urine,pos':0} | |
pts = pts.fillna(dic_analysis) | |
print(pts) | |
return pts | |
# return risk score | |
def urinary_score(df): | |
y_score = [] | |
for i in range(len(df)): | |
n = 0 | |
if df.loc[i, 'Sedimentation Rate,blood,mm/hr'] > 20: | |
n += 1 | |
if df.loc[i, 'WBC Count,blood,(K/uL)'] > 11: | |
n += 1 | |
if df.loc[i, 'Specific Gravity,urine, '] > 1.035: | |
n += 1 | |
if df.loc[i, 'pH,urine,units'] < 5: | |
n += 1 | |
if df.loc[i, 'Protein,urine,mg/dL'] > 20: | |
n += 7 | |
if df.loc[i, 'WBC,urine,pos'] == 1: | |
n += 6 | |
if df.loc[i, 'RBC,urine,pos'] == 1: | |
n += 6 | |
if df.loc[i, 'Nitrite,urine,bin'] == 1: | |
n += 4.4 | |
y_score.append(n) | |
return y_score | |
def dia_score(df): | |
y_score = [] | |
for i in range(len(df)): | |
n = 0 | |
if df.loc[i, 'Glucose, Blood,mg/dL'] > 126: | |
n += 5 | |
elif df.loc[i, 'Glucose, Blood,mg/dL'] >= 110: | |
n += 3.2 | |
if df.loc[i, 'Ketone,urine,mg/dL'] > 3: | |
n += 1.2 | |
if df.loc[i, 'Glucose, Urine,mg/dL'] > 20: | |
n += 2.4 | |
if df.loc[i, 'BMI'] >= 30: | |
n += 4.3 | |
elif df.loc[i, 'BMI'] > 25: | |
n += 2 | |
y_score.append(n) | |
return y_score | |
def return_score(pts, pipeline_ur, model_ur_ft, pipeline_ur_ft): | |
# Loading the models and pipelines | |
pipeline_ur = load(pipeline_ur) | |
model_ur = load_model(model_ur_ft) | |
pipeline_ur2 = load(pipeline_ur_ft) | |
pts['dia_score'] = dia_score(pts) | |
pts['diabetes'] = pts['dia_score'].apply(lambda x: 1 if x >= 6 else 0) | |
# Urinary | |
cols_ur = ['Cholesterol, Total, blood,mg/dL', 'Glucose, Blood,mg/dL', | |
'Glucose, Urine,mg/dL', 'Granulocyte Count,blood,#/uL', | |
'Hematocrit,blood,%', 'Hemoglobin,blood,g/dL', 'Ketone,urine,mg/dL', | |
'Platelet Count,blood,K/uL', 'Protein,urine,mg/dL', | |
'Sedimentation Rate,blood,mm/hr', 'Specific Gravity,urine, ', | |
'Urobilinogen,urine,mg/dL', 'pH,urine,units', 'Absolute Lymphocyte Count,blood, (k/ul)', | |
'WBC Count,blood,(K/uL)', 'RBC,urine,pos', 'WBC,urine,pos', | |
'Bilirubin,urine,pos', 'Nitrite,urine,bin', 'gender_bin', 'anchor_age', | |
'BMI', 'hypertension', 'smoke', 'diabetes'] | |
pts_nn = pts[cols_ur] | |
pts_nn = pipeline_ur.transform(pts_nn) | |
pts['score'] = urinary_score(pts) | |
pts['neuronet'] = model_ur.predict(pts_nn) | |
if pts.loc[0, 'score'] == 0 and pts.loc[0, 'neuronet'] > 0.5: | |
pts.loc[0, 'neuronet'] = pts.loc[0, 'neuronet'] / 10 | |
cols = ['score', 'neuronet'] | |
pts[['score_scaled', 'nn_scaled']] = pipeline_ur2.transform(pts[cols]) | |
pts['comb_score_ur'] = pts['score_scaled'] + pts['nn_scaled'] | |
pts['ur_score'] = pts['score'] | |
pts['ur_nn'] = pts['neuronet'] | |
return pts | |
# return risk and explanation | |
def display_score(pts): | |
guid = pts['GUID'].iloc[0] | |
final_score = pts['comb_score_ur'].iloc[0] | |
nn_score = pts['ur_nn'].iloc[0] | |
ds_score = pts['ur_score'].iloc[0] | |
# Urinary | |
rec_ur = '' | |
expl_ur = '' | |
diagnosis_ur = '' | |
mcb_ur = '' | |
if pts['comb_score_ur'].iloc[0] > 0: | |
rec_ur += 'Высокий риск ЗМПС\n' | |
diagnosis_ur = 'Расстройство мочевыделительной системы неуточненное\n' | |
mcb_ur = 'N39.9\n' | |
if pts['Sedimentation Rate,blood,mm/hr'].iloc[0] > 20: | |
expl_ur += 'Повышена СОЭ\n' | |
if pts['WBC Count,blood,(K/uL)'].iloc[0] > 11: | |
expl_ur += 'Лейкоцитоз\n' | |
if pts['Specific Gravity,urine, '].iloc[0] > 1.035: | |
expl_ur += 'Повышена плотность мочи\n' | |
if pts['pH,urine,units'].iloc[0] < 5: | |
expl_ur += 'Снижен pH мочи\n' | |
if pts['Protein,urine,mg/dL'].iloc[0] > 20: | |
expl_ur += 'Протеинурия\n' | |
if pts['WBC,urine,pos'].iloc[0] == 1: | |
expl_ur += 'Лейкоцитурия\n' | |
if pts['RBC,urine,pos'].iloc[0] == 1: | |
expl_ur += 'Эритроцитурия\n' | |
if pts['Nitrite,urine,bin'].iloc[0] == 1: | |
expl_ur += 'Обнаружены нитриты в моче\n' | |
elif pts['comb_score_ur'].iloc[0] < 0: | |
rec_ur += 'Нет риска ЗМПС\n' | |
else: | |
rec_ur += 'Умеренный риск ЗМПС\n' | |
if pts['Sedimentation Rate,blood,mm/hr'].iloc[0] > 20: | |
expl_ur += 'Повышена СОЭ\n' | |
if pts['WBC Count,blood,(K/uL)'].iloc[0] > 11: | |
expl_ur += 'Лейкоцитоз\n' | |
if pts['Specific Gravity,urine, '].iloc[0] > 1.035: | |
expl_ur += 'Повышена плотность мочи\n' | |
if pts['pH,urine,units'].iloc[0] < 5: | |
expl_ur += 'Снижен pH мочи\n' | |
if pts['Protein,urine,mg/dL'].iloc[0] > 20: | |
expl_ur += 'Протеинурия\n' | |
if pts['WBC,urine,pos'].iloc[0] == 1: | |
expl_ur += 'Лейкоцитурия\n' | |
if pts['RBC,urine,pos'].iloc[0] == 1: | |
expl_ur += 'Эритроцитурия\n' | |
if pts['Nitrite,urine,bin'].iloc[0] == 1: | |
expl_ur += 'Обнаружены нитриты в моче\n' | |
return guid, rec_ur, expl_ur, diagnosis_ur, mcb_ur, final_score, nn_score, ds_score | |