Spaces:

Xiangliyao
/

classification-test

Sleeping

narinsak unawong

Update app.py

6df879d verified 3 months ago

2.74 kB

	import streamlit as st
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.metrics import accuracy_score

	# Load your data (replace with your actual data loading)
	penguins = pd.read_csv('penguins_lter.csv') # Make sure 'penguins_lter.csv' is in your app's directory or accessible

	# Data cleaning and preprocessing (same as your original code)
	penguins_cleaned = penguins.dropna()
	penguins_cleaned = penguins_cleaned.drop_duplicates()

	# Numerical and Categorical Features (same as original code)
	numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)']
	categorical_features = ['Island', 'Sex']

	# Preprocessing pipeline (same as original code)
	numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())])
	categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))])
	preprocessor = ColumnTransformer(transformers=[
	('num', numerical_transformer, numerical_features),
	('cat', categorical_transformer, categorical_features)
	])

	# Machine Learning pipeline (same as original code)
	pipeline = Pipeline(steps=[
	('preprocessor', preprocessor),
	('classifier', KNeighborsClassifier())
	])


	# Streamlit app
	st.title("Penguin Species Classification")

	# Display the dataset (optional)
	if st.checkbox("Show Dataset"):
	st.write(penguins_cleaned)

	# User input features
	st.header("Enter Penguin Features:")
	culmen_length = st.number_input("Culmen Length (mm)", min_value=0.0)
	culmen_depth = st.number_input("Culmen Depth (mm)", min_value=0.0)
	flipper_length = st.number_input("Flipper Length (mm)", min_value=0.0)
	body_mass = st.number_input("Body Mass (g)", min_value=0.0)
	island = st.selectbox("Island", penguins_cleaned['Island'].unique())
	sex = st.selectbox("Sex", penguins_cleaned['Sex'].unique())

	# Create a dataframe for the input
	input_data = pd.DataFrame({
	'Culmen Length (mm)': [culmen_length],
	'Culmen Depth (mm)': [culmen_depth],
	'Flipper Length (mm)': [flipper_length],
	'Body Mass (g)': [body_mass],
	'Island': [island],
	'Sex': [sex]
	})

	# Make Prediction
	if st.button('Predict'):
	# Assuming 'species' is your target variable (same as original code)
	X = penguins_cleaned.drop('Species', axis=1)
	y = penguins_cleaned['Species']

	# Fit the model
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
	pipeline.fit(X_train, y_train)

	prediction = pipeline.predict(input_data)

	st.write(f"Predicted Species: {prediction[0]}")