sheetalW1402's picture
files
51dc7d6
#importing libraries
import joblib
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
#reading train data
train_data = pd.read_csv("Titanic_train.csv")
#checking null values
#train_data.isnull().sum()
#fill null values
train_data.drop(columns='Cabin',axis=1,inplace =True)
train_data['Age'].fillna(train_data['Age'].mean(),inplace=True)
train_data['Embarked'].fillna(train_data['Embarked'].mode()[0],inplace=True)
#for replacing categorical column
object_list = ['Sex','Embarked']
#for train data
le = LabelEncoder()
for x in object_list:
train_data[x] = le.fit_transform(train_data[x])
le_name_mapping = dict(zip(le.classes_,le.transform(le.classes_)))
print('Feature: ',x)
print('Mapping: ',le_name_mapping)
#feature selecting
x = train_data.drop(columns=['Survived','PassengerId','Name','Ticket'] ,axis=1)
y = train_data['Survived']
#split the data
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
# Scaling the data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
#create a model
titanic_model = LogisticRegression()
#train the model
titanic_model.fit(x_train_scaled,y_train)
# Saving the model
joblib.dump(titanic_model, 'titanic_model.joblib')
print("Model trained successfully!!")