In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
 for filename in filenames:
 print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv(
 "/kaggle/input/personal-key-indicators-of-heart-disease/2020/heart_2020_cleaned.csv")

In [None]:
df.isnull().sum()

In [None]:
df = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Sex', 'AgeCategory', 'Race',
 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'])

In [None]:
df['BMI'] = df['BMI'] / (df['BMI'] ** 2)

In [None]:
from sklearn.preprocessing import MinMaxScaler
numerical_columns = ['BMI', 'Stroke', 'PhysicalHealth',
 'MentalHealth', 'DiffWalking', 'SleepTime']
scaler = MinMaxScaler()
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [None]:
for column in df.columns:
 print(column, df[column].unique())

In [None]:
df['Stroke'] = df['Stroke'].map({'No': 0, 'Yes': 1})
df['DiffWalking'] = df['DiffWalking'].map({'No': 0, 'Yes': 1})

In [None]:
scaler = MinMaxScaler()
numerical_columns = ['BMI', 'PhysicalHealth',
 'MentalHealth', 'DiffWalking', 'SleepTime']
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])

In [None]:
z_scores = df[numerical_columns].apply(lambda x: (x - x.mean()) / x.std())
outliers = (z_scores > 3) | (z_scores < -3)
df = df[~outliers.any(axis=1)]

In [None]:
print(df.isnull().sum())

In [None]:
X = df.drop(columns=['HeartDisease'])
y = df['HeartDisease']

In [None]:
from sklearn.model_selection import train_test_split # Add this import statement
X_train, X_test, y_train, y_test = train_test_split(
 X, y, test_size=0.2, random_state=42)

# Logistic regression


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
model = LogisticRegression()

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# KNN


In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn_model = KNeighborsClassifier(n_neighbors=5)

In [None]:
knn_model.fit(X_train, y_train)

In [None]:
knn_y_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_y_pred)
print("KNN Accuracy:", knn_accuracy)

# Naive Bayes


In [None]:
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()

In [None]:
nb_model.fit(X_train, y_train)

In [None]:
nb_y_pred = nb_model.predict(X_test)

In [None]:
nb_accuracy = accuracy_score(y_test, nb_y_pred)
print("Naive Bayes Accuracy:", nb_accuracy)

# Decision Tree


In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier(random_state=42)

In [None]:
dt_model.fit(X_train, y_train)

In [None]:
dt_y_pred = dt_model.predict(X_test)

In [None]:
dt_accuracy = accuracy_score(y_test, dt_y_pred)
print("accuracy:", dt_accuracy)

# Random forests


In [None]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

In [None]:
dt_y_pred = dt_model.predict(X_test)

# Evaluate the Decision Tree model
dt_accuracy = accuracy_score(y_test, dt_y_pred)
print("Decision Tree Accuracy:", dt_accuracy)

# LSTM


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [None]:
X_train_array = X_train.values.astype(np.float32)
X_test_array = X_test.values.astype(np.float32)
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [None]:
X_train_reshaped = np.reshape(
 X_train_array, (X_train_array.shape[0], 1, X_train_array.shape[1]))
X_test_reshaped = np.reshape(
 X_test_array, (X_test_array.shape[0], 1, X_test_array.shape[1]))

In [None]:
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential

In [None]:
model = Sequential()
model.add(LSTM(units=128, input_shape=(
 1, X_train_array.shape[1]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=32, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy',
 metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(X_train_reshaped, y_train_encoded, epochs=30,
 batch_size=32, validation_split=0.1)

In [None]:
y_pred_proba = model.predict(X_test_reshaped)
y_pred = (y_pred_proba > 0.5).astype(int)

In [None]:
accuracy = accuracy_score(y_test_encoded, y_pred)
print("Accuracy:", accuracy)

In [None]:
import matplotlib.pyplot as plt

In [None]:
import pickle

In [None]:
with open('model.pkl', 'wb') as f:
 pickle.dump(model, f)

# CNN

#### `probleme somewhere idk `


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(
 X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(
 X_temp, y_temp, test_size=0.5, random_state=42)

In [None]:
num_features = X_train.shape[1]

In [None]:
model = Sequential([
 Conv1D(filters=32, kernel_size=3, activation='relu',
 input_shape=(num_features, 1)),
 MaxPooling1D(pool_size=2),
 Conv1D(filters=64, kernel_size=3, activation='relu'),
 MaxPooling1D(pool_size=2),
 Flatten(),
 Dense(64, activation='relu'),
 Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001),
 loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_encoded = tf.keras.utils.to_categorical(y)

In [None]:
history = model.fit(X_train, y_train, epochs=10,
 batch_size=32, validation_data=(X_val, y_val))