|
|
|
import joblib |
|
import pandas as pd |
|
from sklearn.preprocessing import StandardScaler |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.preprocessing import LabelEncoder |
|
from sklearn.linear_model import LogisticRegression |
|
|
|
|
|
train_data = pd.read_csv("Titanic_train.csv") |
|
|
|
|
|
|
|
|
|
|
|
train_data.drop(columns='Cabin',axis=1,inplace =True) |
|
train_data['Age'].fillna(train_data['Age'].mean(),inplace=True) |
|
train_data['Embarked'].fillna(train_data['Embarked'].mode()[0],inplace=True) |
|
|
|
|
|
object_list = ['Sex','Embarked'] |
|
|
|
|
|
le = LabelEncoder() |
|
for x in object_list: |
|
train_data[x] = le.fit_transform(train_data[x]) |
|
le_name_mapping = dict(zip(le.classes_,le.transform(le.classes_))) |
|
print('Feature: ',x) |
|
print('Mapping: ',le_name_mapping) |
|
|
|
|
|
x = train_data.drop(columns=['Survived','PassengerId','Name','Ticket'] ,axis=1) |
|
y = train_data['Survived'] |
|
|
|
|
|
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2) |
|
|
|
|
|
scaler = StandardScaler() |
|
x_train_scaled = scaler.fit_transform(x_train) |
|
|
|
|
|
titanic_model = LogisticRegression() |
|
|
|
|
|
titanic_model.fit(x_train_scaled,y_train) |
|
|
|
|
|
joblib.dump(titanic_model, 'titanic_model.joblib') |
|
print("Model trained successfully!!") |
|
|
|
|