|
import numpy as np
|
|
import streamlit as st
|
|
import pandas as pd
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
from sklearn.neighbors import KNeighborsRegressor
|
|
from sklearn.metrics import mean_squared_error
|
|
st.title(":red[Welcome to my ML Project]")
|
|
df = pd.read_csv("tips.csv")
|
|
|
|
y = df.pop("total_bill")
|
|
x = df
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.15, random_state=30)
|
|
|
|
numerical_data = X_train.select_dtypes("number")
|
|
cat_data = X_train.select_dtypes("object")
|
|
|
|
encoder = OneHotEncoder(sparse_output= False)
|
|
X_train_cat = pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out())
|
|
scaler = StandardScaler()
|
|
res = scaler.fit_transform(numerical_data)
|
|
X_train_num = pd.DataFrame(res, columns = numerical_data.columns)
|
|
Final_X_train_data = pd.concat([X_train_cat, X_train_num], axis=1)
|
|
|
|
X_test_num = X_test.select_dtypes("number")
|
|
X_test_cat = X_test.select_dtypes("object")
|
|
|
|
X_test_num_trans = scaler.transform(X_test_num)
|
|
res1 = pd.DataFrame(X_test_num_trans, columns = X_test_num.columns)
|
|
|
|
X_test_cat_trans = encoder.transform(X_test_cat)
|
|
res2 = pd.DataFrame(X_test_cat_trans, columns = encoder.get_feature_names_out())
|
|
|
|
Final_X_test = pd.concat([res2,res1], axis =1)
|
|
|
|
regression = KNeighborsRegressor()
|
|
regression.fit(Final_X_train_data, y_train)
|
|
y_pred = regression.predict(Final_X_test)
|
|
|
|
mean_squared_error(y_test, y_pred)
|
|
|
|
tip = st.number_input("Enter Customer Tip")
|
|
|
|
sex = ["Female", "Male"]
|
|
select_sex = st.selectbox("Select Customer Gender", sex)
|
|
|
|
smoker = ["No", "Yes"]
|
|
select_smoker = st.selectbox("Select Customer Smoker or not", smoker)
|
|
|
|
day = ["Sun", "Sat", "Fri", "Thur"]
|
|
select_day = st.selectbox("select day", day)
|
|
|
|
time = ["Dinner", "Lunch"]
|
|
select_time = st.selectbox("Select time", time)
|
|
|
|
size = st.number_input("Enter size")
|
|
|
|
if st.button("Predict Total Bill"):
|
|
query_point = pd.DataFrame([
|
|
{
|
|
"tip" : tip,
|
|
"sex" : select_sex,
|
|
"smoker" : select_smoker,
|
|
"day" : select_day,
|
|
"time" : select_time,
|
|
"size" : size
|
|
}]
|
|
)
|
|
|
|
cat_query_point = query_point.select_dtypes("object")
|
|
|
|
num_query_point = query_point.select_dtypes("number")
|
|
|
|
cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point), columns= encoder.get_feature_names_out())
|
|
num_query_point_trans = pd.DataFrame(scaler.transform(num_query_point), columns = X_test_num.columns)
|
|
|
|
final_query_point = pd.concat([cat_query_point_trans, num_query_point_trans], axis = 1)
|
|
|
|
def fun(query_point):
|
|
res = regression.predict(query_point)[0]
|
|
return res
|
|
|
|
st.write(fun(final_query_point)) |