Spaces:

sks01dev
/

Customer-Conversion-Prediction

Sleeping

Upload 15 files

d64524a verified about 2 months ago

1.64 kB

	#!/usr/bin/env python
	# coding: utf-8

	# This is a starter notebook for an updated module 5 of ML Zoomcamp
	#
	# The code is based on the modules 3 and 4. We use the same dataset: [telco customer churn](https://www.kaggle.com/datasets/blastchar/telco-customer-churn)

	# Import the necessary libraries
	import numpy as np
	import pandas as pd
	import sklearn
	import pickle
	from sklearn.linear_model import LogisticRegression
	from sklearn.pipeline import make_pipeline
	from sklearn.feature_extraction import DictVectorizer


	print(f'pandas=={pd.__version__}')
	print(f'numpy=={np.__version__}')
	print(f'sklearn=={sklearn.__version__}')


	# Load the data
	def load_data():
	data_url = "https://raw.githubusercontent.com/alexeygrigorev/datasets/master/course_lead_scoring.csv"
	df = pd.read_csv(data_url)
	return df



	def train_model(df):
	# Preprocessing using DictVectorizer and Training the Logistic Regressio model
	categorical = ['lead_source']
	numeric = ['number_of_courses_viewed', 'annual_income']

	df[categorical] = df[categorical].fillna('NA')
	df[numeric] = df[numeric].fillna(0)

	train_dict = df[categorical + numeric].to_dict(orient='records')

	pipeline = make_pipeline(
	DictVectorizer(),
	LogisticRegression(solver='liblinear')
	)

	# the target variable
	y_train = df.converted

	pipeline.fit(train_dict, y_train)
	return pipeline


	def save_model(filename, model):
	with open(filename, 'wb') as f_out:
	pickle.dump(model, f_out)

	print(f"Model saved to {filename}")


	df = load_data()
	pipeline = train_model(df)
	save_model('model.bin', pipeline)