Spaces:

crudcook
/

Medical_Insurance_Cost_Prediction

Sleeping

App Files Files Community

Medical_Insurance_Cost_Prediction / app.py

Rahul-Crudcook

Upload 3 files

fc8f2da verified 8 months ago

raw

history blame contribute delete

4.26 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler, LabelEncoder
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.metrics import mean_squared_error, r2_score
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.layers import Dense, Dropout
	from tensorflow.keras.optimizers import Adam
	import matplotlib.pyplot as plt

	# Load and display dataset
	@st.cache_data
	def load_data():
	data = pd.read_csv("insurance.csv") # Ensure insurance.csv is in the same directory
	return data

	data = load_data()
	st.title("Medical Insurance Cost Prediction with Hybrid Model")
	st.write("Dataset preview:")
	st.write(data.head())

	# Preprocessing and Feature Engineering
	st.subheader("Data Preprocessing and Feature Engineering")
	data['age_smoker'] = data['age'] * data['smoker'].apply(lambda x: 1 if x == 'yes' else 0)
	data['bmi_smoker'] = data['bmi'] * data['smoker'].apply(lambda x: 1 if x == 'yes' else 0)

	# Encode categorical variables
	label_encoder = LabelEncoder()
	data['sex'] = label_encoder.fit_transform(data['sex'])
	data['smoker'] = label_encoder.fit_transform(data['smoker'])
	data['region'] = label_encoder.fit_transform(data['region'])

	# Select features
	X = data[['age', 'sex', 'bmi', 'children', 'smoker', 'region', 'age_smoker', 'bmi_smoker']]
	y = data['charges']

	# Standardize numerical features
	scaler = StandardScaler()
	X[['age', 'bmi', 'children', 'age_smoker', 'bmi_smoker']] = scaler.fit_transform(X[['age', 'bmi', 'children', 'age_smoker', 'bmi_smoker']])

	# Split data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

	# Define the neural network model
	def create_neural_network():
	model = Sequential([
	Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
	Dropout(0.3),
	Dense(64, activation='relu'),
	Dense(1)
	])
	model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
	return model

	st.subheader("Training the Neural Network")
	nn_model = create_neural_network()
	nn_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)

	# Generate predictions from the neural network for train and test sets
	nn_train_pred = nn_model.predict(X_train).flatten()
	nn_test_pred = nn_model.predict(X_test).flatten()

	# Add NN predictions as a new feature for Random Forest
	X_train_rf = X_train.copy()
	X_test_rf = X_test.copy()
	X_train_rf['nn_pred'] = nn_train_pred
	X_test_rf['nn_pred'] = nn_test_pred

	# Train a Random Forest on this new feature set
	st.subheader("Training the Random Forest with Neural Network Predictions")
	rf_model = RandomForestRegressor(n_estimators=200, max_depth=12, random_state=42)
	rf_model.fit(X_train_rf, y_train)
	final_predictions = rf_model.predict(X_test_rf)

	# Model evaluation
	rmse = np.sqrt(mean_squared_error(y_test, final_predictions))
	r2 = r2_score(y_test, final_predictions) * 100
	st.write(f"RMSE (Root Mean Squared Error): {rmse:.2f}")
	st.write(f"R² (Accuracy): {r2:.2f}%")

	# Plot actual vs predicted values
	st.subheader("Actual vs Predicted Values")
	plt.figure(figsize=(10, 5))
	plt.plot(y_test.values, label="Actual Values", color='blue')
	plt.plot(final_predictions, label="Predicted Values", color='orange')
	plt.xlabel("Sample Index")
	plt.ylabel("Insurance Charges")
	plt.legend()
	st.pyplot(plt)

	# Prediction on new data
	st.subheader("Predict on New Data")
	input_data = {col: st.number_input(f"Enter {col}:", value=float(X[col].mean())) for col in X.columns}

	if st.button("Predict Insurance Charge"):
	input_df = pd.DataFrame([input_data])
	input_df[['age', 'bmi', 'children', 'age_smoker', 'bmi_smoker']] = scaler.transform(
	input_df[['age', 'bmi', 'children', 'age_smoker', 'bmi_smoker']])

	# Use neural network to predict intermediate feature
	nn_feature = nn_model.predict(input_df).flatten()
	input_df['nn_pred'] = nn_feature

	# Predict final charge using Random Forest
	final_prediction = rf_model.predict(input_df)
	st.write(f"Predicted Insurance Charge: ${final_prediction[0]:.2f}")