import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split import pickle import os import pickle from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier penguin_df = pd.read_csv('src/penguins.csv') st.write(penguin_df.head()) st.subheader('Penguin Species') penguin_df.dropna(inplace=True) output = penguin_df['species'] features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex']] features = pd.get_dummies(features) st.write('Here are our output variables') st.write(output.head()) st.write('Here are our feature variables') st.write(features.head()) st.subheader('Model Training') output = penguin_df['species'] features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex']] features = pd.get_dummies(features) output, uniques = pd.factorize(output) x_train, x_test, y_train, y_test = train_test_split( features, output, test_size=.8) rfc = RandomForestClassifier(random_state=15) rfc.fit(x_train.values, y_train) y_pred = rfc.predict(x_test.values) score = accuracy_score(y_pred, y_test) st.write('Our accuracy score for this model is {}'.format(score)) st.subheader('Save the Model Output to Pickle') # Create output directory if it doesn't exist output_dir = "outputs" os.makedirs(output_dir, exist_ok=True) # Save the model model_filename = os.path.join(output_dir, "random_forest_penguin.pickle") with open(model_filename, "wb") as rf_pickle: pickle.dump(rfc, rf_pickle) # Save the uniques or other data uniques_filename = os.path.join(output_dir, "uniques_data.pickle") with open(uniques_filename, "wb") as output_pickle: pickle.dump(uniques, output_pickle) st.write("Model saved to {}".format(model_filename)) st.write("Click below to download the model.") # Load the files to enable download with open(model_filename, "rb") as f: model_bytes = f.read() st.download_button( label="Download Trained Model (random_forest_penguin.pickle)", data=model_bytes, file_name="random_forest_penguin.pickle", mime="application/octet-stream" ) # Load the files to enable download with open(uniques_filename, "rb") as f: model_bytes = f.read() st.download_button( label="Download the data (uniques_data.pickle)", data=model_bytes, file_name="uniques_data.pickle", mime="application/octet-stream" )