# -*- coding: utf-8 -*-
"""
Created on Tue Dec 26 21:49:46 2023

@author: admin
"""
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


def preprocess_data(filepath,form):
    df=pd.read_excel(filepath)
    df = df[df['TAD'] >= 4]
    df_form1 = df[df['form'] == 1]
    df_form2 = df[df['form'] == 2]
    
    if form==1:
        return df_form1
    elif form==0:
        return df
    else:
        return df_form2
    
def process_train_data(df,form_type,output_type):
    y = df.iloc[:, 3].values
    form = df.iloc[:, 4].values
    gend = df.iloc[:, 5].values
    BSA = df.iloc[:, 6].values
    zyme = df.iloc[:, 7].values
    age = df.iloc[:, 8].values
    t = df.iloc[:, 1].values
    AMT = df.iloc[:, 2].values
    
    # Reshaping data
    AMT = np.reshape(AMT, (-1))
    BSA = np.reshape(BSA, (-1, 1))
    t = np.reshape(t, (-1, 1))
    form = np.reshape(form, (-1, 1))
    gend = np.reshape(gend, (-1, 1))
    zyme = np.reshape(zyme, (-1, 1))
    age = np.reshape(age, (-1, 1))
    
    
    k_train = -(np.log(y / AMT))
    if output_type==1:
        k_train = -(np.log(y))
    elif output_type==2:
        k_train = -(np.log(y/AMT))
       
    AMT1 = np.reshape(AMT, (-1,1))
    max_k = np.max(k_train)
    min_k = np.min(k_train)
    y = np.reshape(y, (-1, 1))
    # train_out_normalized = k_train
    train_out_normalized = (k_train - min_k) / (max_k - min_k)
    # train_out_normalized = one_hot_encode(train_out_normalized,10)
    train_out_normalized = np.reshape(train_out_normalized,(-1,1))
    # min_max_scaler = MinMaxScaler()

    # # Fit the scaler on the features and transform
    # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)

    
    if output_type==1:
        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
    elif output_type==2:
        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
    else:
        train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
    
    
    if output_type==1:
        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
    elif output_type==2:
        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
    else:
        return train_in_normalized,y,max_k,min_k,AMT
    
def process_train_data_DNN(df,form_type,output_type):
    y = df.iloc[:, 3].values
    form = df.iloc[:, 4].values
    gend = df.iloc[:, 5].values
    BSA = df.iloc[:, 6].values
    zyme = df.iloc[:, 7].values
    age = df.iloc[:, 8].values
    t = df.iloc[:, 1].values
    AMT = df.iloc[:, 2].values
    
    # Reshaping data
    AMT = np.reshape(AMT, (-1))
    BSA = np.reshape(BSA, (-1, 1))
    t = np.reshape(t, (-1, 1))
    form = np.reshape(form, (-1, 1))
    gend = np.reshape(gend, (-1, 1))
    zyme = np.reshape(zyme, (-1, 1))
    age = np.reshape(age, (-1, 1))
    max_AMT = np.max(AMT)
    min_AMT = np.min(AMT)
    
    
    k_train = -(np.log(y / AMT))
    if output_type==1:
        k_train = -(np.log(y))*1.
    elif output_type==2:
        k_train = -(np.log(y*5/AMT))
        
    # AMT = (AMT-min_AMT)/(max_AMT-min_AMT)
    AMT1 = np.reshape(AMT, (-1,1))
    max_k = np.max(k_train)
    min_k = np.min(k_train)
    
    
    y = np.reshape(y, (-1, 1))
    # train_out_normalized = k_train
    train_out_normalized = (k_train - min_k) / (max_k - min_k)
    # train_out_normalized = one_hot_encode(train_out_normalized,10)
    # train_out_normalized = np.reshape(train_out_normalized,(-1,1))
    # min_max_scaler = MinMaxScaler()
    # Fit the scaler on the features and transform
    # train_out_normalized = min_max_scaler.fit_transform(train_out_normalized)
    
    
    if output_type==1:
        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA,np.power(BSA,3),AMT1,t,form),axis=1)
    elif output_type==2:
        train_in_normalized=np.concatenate((np.power(BSA,1/3),BSA, np.power(BSA,3), AMT1,t,form),axis=1)
    else:
        train_in_normalized=np.concatenate((BSA,AMT1,t,form),axis=1)
    
    
    if output_type==1:
        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
    elif output_type==2:
        return train_in_normalized,train_out_normalized,max_k,min_k,AMT
    else:
        return train_in_normalized,y,max_k,min_k,max_AMT,min_AMT
    
    
def turn_back_DNN(data,max_k,min_k,train_data,output_type):
    if output_type==1:
        y1=np.reshape(data,-1)
        y1=y1*(max_k-min_k)+min_k
        AMT=train_data[:,3]
        # print(np.shape(AMT))
        # AMT = np.reshape(AMT, (-1))
        # y_1=AMT*np.exp(-y1);
        y_1=np.exp(-y1)/1;
        # y_1=y1
    elif output_type==2:
        y1=np.reshape(data,-1)
        y1=y1*(max_k-min_k)+min_k
        AMT=train_data[:,3]
        # print(np.shape(AMT))
        # AMT = np.reshape(AMT, (-1))
        y_1=AMT*np.exp(-y1)/5;#6
    else:
        y_1=data/1.
    return y_1
  

def turn_back(data,max_k,min_k,train_data,output_type):
    if output_type==1:
        y1=np.reshape(data,-1)
        y1=y1*(max_k-min_k)+min_k
        AMT=train_data[:,2]
        # print(np.shape(AMT))
        # AMT = np.reshape(AMT, (-1))
        # y_1=AMT*np.exp(-y1)/6;
        y_1=np.exp(-y1)/1.25;
        # y_1=y1
    elif output_type==2:
        y1=np.reshape(data,-1)
        y1=y1*(max_k-min_k)+min_k
        AMT=train_data[:,2]
        # print(np.shape(AMT))
        # AMT = np.reshape(AMT, (-1))
        y_1=AMT*np.exp(-y1)/1;
    else:
        y_1=data/1.
    return y_1

def result_output(train_y,y_train_pre):
    mse = mean_squared_error(train_y,y_train_pre)
    rmse = mean_squared_error(train_y,y_train_pre, squared=False)
    r2 = r2_score(train_y,y_train_pre)
    mae = mean_absolute_error(train_y,y_train_pre)
    
    print('train_MSE:', mse)
    print('train_RMSE:', rmse)
    print('train_R-squared:', r2)
    print('train_MAE:', mae)

def one_hot_encode(values, num_classes=10):

    interval = 1 / num_classes

    # 计算每个值所属的类别
    categories = np.floor(values / interval).astype(int)
    categories[categories == num_classes] = num_classes - 1  # 处理边界情况

    # 应用one-hot编码
    one_hot_encoded = np.eye(num_classes)[categories]

    return one_hot_encoded


def cal_accuracy(y_pred,test_y):
    
    
    # within_10_percent = sum(abs(pred - actual) <= 0.10 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
    within_20_percent = sum(abs(pred - actual) <= 0.20 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
    within_30_percent = sum(abs(pred - actual) <= 0.30 * actual for actual, pred in zip(test_y, y_pred)) / len(test_y)
    # print("within_10_percent:",within_10_percent*100)
    print("within_20_percent:",within_20_percent*100)
    print("within_30_percent:",within_30_percent*100)
    
def draw_acc(train_y, y_train_pre,txt=None):
    fig, ax = plt.subplots()

    # Scatter plot: Actual vs Predicted Drug Concentrations
    ax.scatter(y_train_pre, train_y, s=10, label='Observations')

    # Set labels for x and y axes
    ax.set_xlabel('Predicted Concentration')
    ax.set_ylabel('Measured Concentration')
    ax.grid(True)
    # Generate data for the line and tolerance areas
    x = np.linspace(0, 100, 500)
    # y = x
    y_20_upper = x * 1.2
    y_20_lower = x * 0.8
    y_30_upper = x * 1.3
    y_30_lower = x * 0.7

    # Draw y=x line (Perfect Prediction Line)
    # ax.plot(x, y, color='black', label='Perfect Prediction Line y=x')

    # Draw 20% tolerance lines in blue
    ax.plot(x, y_20_upper, color='blue', linestyle='--', label='20% Upper Bound')
    ax.plot(x, y_20_lower, color='blue', linestyle='--', label='20% Lower Bound')
    
    # Draw 30% tolerance lines in red
    ax.plot(x, y_30_upper, color='red', linestyle='--', label='30% Upper Bound')
    ax.plot(x, y_30_lower, color='red', linestyle='--', label='30% Lower Bound')
    
    # Fill areas between 20% and 30% tolerance bands with lighter color
    ax.fill_between(x, y_20_lower, y_20_upper, color='blue', alpha=0.1)
    ax.fill_between(x, y_30_lower, y_30_upper, color='red', alpha=0.1)
    ax.set_xlim([-5, 100])
    # Add legend
    ax.legend()
    fig.set_facecolor('white')

    # Display the plot
    # ax.show()
    # ax.savefig(txt, dpi=600,format='svg')
    if txt!=None:
        
        fig.savefig(txt, dpi=300, format='tif') 

    # 然后显示图表
    plt.show()