# -*- coding: utf-8 -*- """ Created on Tue Dec 26 21:49:46 2023 @author: admin """ import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from sklearn.preprocessing import MinMaxScaler import matplotlib.pyplot as plt def preprocess_data(filepath,form): df=pd.read_excel(filepath) df = df[df['TAD'] >= 4] df_form1 = df[df['form'] == 1] df_form2 = df[df['form'] == 2] if form==1: return df_form1 elif form==0: return df else: return df_form2 def process_train_data(df,form_type,output_type): y = df.iloc[:, 3].values form = df.iloc[:, 4].values gend = df.iloc[:, 5].values BSA = df.iloc[:, 6].values zyme = df.iloc[:, 7].values age = df.iloc[:, 8].values t = df.iloc[:, 1].values AMT = df.iloc[:, 2].values # Reshaping data AMT = np.reshape(AMT, (-1)) BSA = np.reshape(BSA, (-1, 1)) t = np.reshape(t, (-1, 1)) form = np.reshape(form, (-1, 1)) gend = np.reshape(gend, (-1, 1)) zyme = np.reshape(zyme, (-1, 1)) age = np.reshape(age, (-1, 1)) k_train = -(np.log(y / AMT)) if output_type==1: k_train = -(np.log(y)) elif output_type==2: k_train = -(np.log(y/AMT)) AMT1 = np.reshape(AMT, (-1,1)) max_k = np.max(k_train) min_k = np.min(k_train) y = np.reshape(y, (-1, 1)) # train_out_normalized = k_train train_out_normalized = (k_train - min_k) / (max_k - min_k) # train_out_normalized = one_hot_encode(train_out_normalized,10) train_out_normalized = np.reshape(train_out_normalized,(-1,1)) # min_max_scaler = MinMaxScaler() # # Fit the scaler on the features and transform # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized) if output_type==1: train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) elif output_type==2: train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) else: train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1) if output_type==1: return train_in_normalized,train_out_normalized,max_k,min_k,AMT elif output_type==2: return train_in_normalized,train_out_normalized,max_k,min_k,AMT else: return train_in_normalized,y,max_k,min_k,AMT def process_train_data_DNN(df,form_type,output_type): y = df.iloc[:, 3].values form = df.iloc[:, 4].values gend = df.iloc[:, 5].values BSA = df.iloc[:, 6].values zyme = df.iloc[:, 7].values age = df.iloc[:, 8].values t = df.iloc[:, 1].values AMT = df.iloc[:, 2].values # Reshaping data AMT = np.reshape(AMT, (-1)) BSA = np.reshape(BSA, (-1, 1)) t = np.reshape(t, (-1, 1)) form = np.reshape(form, (-1, 1)) gend = np.reshape(gend, (-1, 1)) zyme = np.reshape(zyme, (-1, 1)) age = np.reshape(age, (-1, 1)) max_AMT = np.max(AMT) min_AMT = np.min(AMT) k_train = -(np.log(y / AMT)) if output_type==1: k_train = -(np.log(y))*1. elif output_type==2: k_train = -(np.log(y*5/AMT)) # AMT = (AMT-min_AMT)/(max_AMT-min_AMT) AMT1 = np.reshape(AMT, (-1,1)) max_k = np.max(k_train) min_k = np.min(k_train) y = np.reshape(y, (-1, 1)) # train_out_normalized = k_train train_out_normalized = (k_train - min_k) / (max_k - min_k) # train_out_normalized = one_hot_encode(train_out_normalized,10) # train_out_normalized = np.reshape(train_out_normalized,(-1,1)) # min_max_scaler = MinMaxScaler() # Fit the scaler on the features and transform # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized) if output_type==1: train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1) elif output_type==2: train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1) else: train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1) if output_type==1: return train_in_normalized,train_out_normalized,max_k,min_k,AMT elif output_type==2: return train_in_normalized,train_out_normalized,max_k,min_k,AMT else: return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT def turn_back_DNN(data,max_k,min_k,train_data,output_type): if output_type==1: y1=np.reshape(data,-1) y1=y1*(max_k-min_k)+min_k AMT=train_data[:,3] # print(np.shape(AMT)) # AMT = np.reshape(AMT, (-1)) # y_1=AMT*np.exp(-y1); y_1=np.exp(-y1)/1; # y_1=y1 elif output_type==2: y1=np.reshape(data,-1) y1=y1*(max_k-min_k)+min_k AMT=train_data[:,3] # print(np.shape(AMT)) # AMT = np.reshape(AMT, (-1)) y_1=AMT*np.exp(-y1)/5;#6 else: y_1=data/1. return y_1 def turn_back(data,max_k,min_k,train_data,output_type): if output_type==1: y1=np.reshape(data,-1) y1=y1*(max_k-min_k)+min_k AMT=train_data[:,2] # print(np.shape(AMT)) # AMT = np.reshape(AMT, (-1)) # y_1=AMT*np.exp(-y1)/6; y_1=np.exp(-y1)/1.25; # y_1=y1 elif output_type==2: y1=np.reshape(data,-1) y1=y1*(max_k-min_k)+min_k AMT=train_data[:,2] # print(np.shape(AMT)) # AMT = np.reshape(AMT, (-1)) y_1=AMT*np.exp(-y1)/1; else: y_1=data/1. return y_1 def result_output(train_y,y_train_pre): mse = mean_squared_error(train_y,y_train_pre) rmse = mean_squared_error(train_y,y_train_pre, squared=False) r2 = r2_score(train_y,y_train_pre) mae = mean_absolute_error(train_y,y_train_pre) print('train_MSE:', mse) print('train_RMSE:', rmse) print('train_R-squared:', r2) print('train_MAE:', mae) def one_hot_encode(values, num_classes=10): interval = 1 / num_classes # 计算每个值所属的类别 categories = np.floor(values / interval).astype(int) categories[categories == num_classes] = num_classes - 1 # 处理边界情况 # 应用one-hot编码 one_hot_encoded = np.eye(num_classes)[categories] return one_hot_encoded def cal_accuracy(y_pred,test_y): # within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y) # print("within_10_percent:",within_10_percent*100) print("within_20_percent:",within_20_percent*100) print("within_30_percent:",within_30_percent*100) def draw_acc(train_y, y_train_pre,txt=None): fig, ax = plt.subplots() # Scatter plot: Actual vs Predicted Drug Concentrations ax.scatter(y_train_pre, train_y, s=10, label='Observations') # Set labels for x and y axes ax.set_xlabel('Predicted Concentration') ax.set_ylabel('Measured Concentration') ax.grid(True) # Generate data for the line and tolerance areas x = np.linspace(0, 100, 500) # y = x y_20_upper = x * 1.2 y_20_lower = x * 0.8 y_30_upper = x * 1.3 y_30_lower = x * 0.7 # Draw y=x line (Perfect Prediction Line) # ax.plot(x, y, color='black', label='Perfect Prediction Line y=x') # Draw 20% tolerance lines in blue ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound') ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound') # Draw 30% tolerance lines in red ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound') ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound') # Fill areas between 20% and 30% tolerance bands with lighter color ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1) ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1) ax.set_xlim([-5, 100]) # Add legend ax.legend() fig.set_facecolor('white') # Display the plot # ax.show() # ax.savefig(txt, dpi=600,format='svg') if txt!=None: fig.savefig(txt, dpi=300, format='tif') # 然后显示图表 plt.show()