import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score # Load your data (replace with your actual data loading) # Assuming penguins.csv is in the same directory as your Streamlit app try: penguins = pd.read_csv('penguins_lter.csv') except FileNotFoundError: st.error("Error: penguins_lter.csv not found. Please make sure the file is in the same directory as the app.") st.stop() # Preprocessing steps (same as your original code) penguins = penguins.dropna() penguins.drop_duplicates(inplace=True) # Streamlit app st.title('Penguin Species Prediction') # Sidebar for user input st.sidebar.header('Input Features') island = st.sidebar.selectbox('Island', penguins['Island'].unique()) culmen_length = st.sidebar.slider('Culmen Length (mm)', float(penguins['Culmen Length (mm)'].min()), float(penguins['Culmen Length (mm)'].max()), float(penguins['Culmen Length (mm)'].mean())) culmen_depth = st.sidebar.slider('Culmen Depth (mm)', float(penguins['Culmen Depth (mm)'].min()), float(penguins['Culmen Depth (mm)'].max()), float(penguins['Culmen Depth (mm)'].mean())) flipper_length = st.sidebar.slider('Flipper Length (mm)', float(penguins['Flipper Length (mm)'].min()), float(penguins['Flipper Length (mm)'].max()), float(penguins['Flipper Length (mm)'].mean())) body_mass = st.sidebar.slider('Body Mass (g)', float(penguins['Body Mass (g)'].min()), float(penguins['Body Mass (g)'].max()), float(penguins['Body Mass (g)'].mean())) sex = st.sidebar.selectbox('Sex', penguins['Sex'].unique()) # Create input DataFrame input_data = pd.DataFrame({ 'Island': [island], 'Culmen Length (mm)': [culmen_length], 'Culmen Depth (mm)': [culmen_depth], 'Flipper Length (mm)': [flipper_length], 'Body Mass (g)': [body_mass], 'Sex': [sex] }) # Prepare the model (same as before, including your pipeline) X = penguins.drop('Species', axis=1) y = penguins['Species'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) numerical_features = ['Culmen Length (mm)', 'Culmen Depth (mm)', 'Flipper Length (mm)', 'Body Mass (g)'] categorical_features = ['Island', 'Sex'] numerical_transformer = Pipeline(steps=[('scaler', StandardScaler())]) categorical_transformer = Pipeline(steps=[('onehot', OneHotEncoder(handle_unknown='ignore'))]) preprocessor = ColumnTransformer( transformers=[ ('num', numerical_transformer, numerical_features), ('cat', categorical_transformer, categorical_features) ]) pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', KNeighborsClassifier())]) pipeline.fit(X_train, y_train) # Make prediction prediction = pipeline.predict(input_data) # Display prediction st.subheader('Prediction') st.write(f"Predicted Penguin Species: {prediction[0]}")