import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score import pickle heart_data = pd.read_csv(r'C:\Student\data/Copy of heart_disease_data.csv') # print first 5 rows of the dataset heart_data.head() heart_data.tail() # number of rows and columns in the dataset heart_data.shape # getting some info about the data heart_data.info() # checking for missing values heart_data.isnull().sum() # statistical measures about the data heart_data.describe() # checking the distribution of Target Variable heart_data['target'].value_counts() # 1 --> Defective Heart #0 --> Healthy Heart #Splitting the Features and Target X = heart_data.drop(columns='target', axis=1) Y = heart_data['target'] print(X) print(Y) #Splitting the Data into Training data & Test Data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2) print(X.shape, X_train.shape, X_test.shape) #Model Training #Logistic Regression model = LogisticRegression() # training the LogisticRegression model with Training data model.fit(X_train, Y_train) #Model Evaluation # Save the trained model to a pickle file with open('heart_disease_model.pkl', 'wb') as model_file: pickle.dump(model, model_file) print("Model trained and saved as 'heart_disease_model.pkl'") # Load the saved model from the pickle file with open('heart_disease_model.pkl', 'rb') as model_file: loaded_model = pickle.load(model_file) #Accuracy Score # accuracy on training data X_train_prediction = model.predict(X_train) training_data_accuracy = accuracy_score(X_train_prediction, Y_train) print('Accuracy on Training data : ', training_data_accuracy) # accuracy on test data X_test_prediction = model.predict(X_test) test_data_accuracy = accuracy_score(X_test_prediction, Y_test) print('Accuracy on Test data : ', test_data_accuracy) #Building a Predictive System input_data = (99,1,4,300,600,1,2,500,1,7.2,0,2,5) # change the input data to a numpy array input_data_as_numpy_array= np.asarray(input_data) # reshape the numpy array as we are predicting for only on instance input_data_reshaped = input_data_as_numpy_array.reshape(1,-1) prediction = model.predict(input_data_reshaped) print(prediction) if (prediction[0]== 0): print('The Person does not have a Heart Disease') else: print('The Person has Heart Disease')