tips_2 / app.py
Sowmith22's picture
Upload 3 files
87d5aaa verified
import numpy as np
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
st.title(":red[Welcome to my ML Project]")
df = pd.read_csv("tips.csv")
y = df.pop("total_bill")
x = df
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.15, random_state=30)
numerical_data = X_train.select_dtypes("number")
cat_data = X_train.select_dtypes("object")
encoder = OneHotEncoder(sparse_output= False)
X_train_cat = pd.DataFrame(encoder.fit_transform(cat_data), columns=encoder.get_feature_names_out())
scaler = StandardScaler()
res = scaler.fit_transform(numerical_data)
X_train_num = pd.DataFrame(res, columns = numerical_data.columns)
Final_X_train_data = pd.concat([X_train_cat, X_train_num], axis=1)
X_test_num = X_test.select_dtypes("number")
X_test_cat = X_test.select_dtypes("object")
X_test_num_trans = scaler.transform(X_test_num)
res1 = pd.DataFrame(X_test_num_trans, columns = X_test_num.columns)
X_test_cat_trans = encoder.transform(X_test_cat)
res2 = pd.DataFrame(X_test_cat_trans, columns = encoder.get_feature_names_out())
Final_X_test = pd.concat([res2,res1], axis =1)
regression = KNeighborsRegressor()
regression.fit(Final_X_train_data, y_train)
y_pred = regression.predict(Final_X_test)
mean_squared_error(y_test, y_pred)
tip = st.number_input("Enter Customer Tip")
sex = ["Female", "Male"]
select_sex = st.selectbox("Select Customer Gender", sex)
smoker = ["No", "Yes"]
select_smoker = st.selectbox("Select Customer Smoker or not", smoker)
day = ["Sun", "Sat", "Fri", "Thur"]
select_day = st.selectbox("select day", day)
time = ["Dinner", "Lunch"]
select_time = st.selectbox("Select time", time)
size = st.number_input("Enter size")
if st.button("Predict Total Bill"):
query_point = pd.DataFrame([
{
"tip" : tip,
"sex" : select_sex,
"smoker" : select_smoker,
"day" : select_day,
"time" : select_time,
"size" : size
}]
)
cat_query_point = query_point.select_dtypes("object")
num_query_point = query_point.select_dtypes("number")
cat_query_point_trans = pd.DataFrame(encoder.transform(cat_query_point), columns= encoder.get_feature_names_out())
num_query_point_trans = pd.DataFrame(scaler.transform(num_query_point), columns = X_test_num.columns)
final_query_point = pd.concat([cat_query_point_trans, num_query_point_trans], axis = 1)
def fun(query_point):
res = regression.predict(query_point)[0]
return res
st.write(fun(final_query_point))