Spaces:
Sleeping
Sleeping
File size: 2,438 Bytes
d468d6e a8266d2 2b3c03c 0849d01 b81ca23 2b3c03c d468d6e a8266d2 d4240f9 fa94ea3 d4240f9 2b3c03c 0849d01 b81ca23 0849d01 b81ca23 e18db79 121e981 4400619 a9f6d6c 121e981 e18db79 567000e ee3b054 567000e ee3b054 567000e e18db79 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
import os
import pickle
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
penguin_df = pd.read_csv('src/penguins.csv')
st.write(penguin_df.head())
st.subheader('Penguin Species')
penguin_df.dropna(inplace=True)
output = penguin_df['species']
features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm',
'flipper_length_mm', 'body_mass_g', 'sex']]
features = pd.get_dummies(features)
st.write('Here are our output variables')
st.write(output.head())
st.write('Here are our feature variables')
st.write(features.head())
st.subheader('Model Training')
output = penguin_df['species']
features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm',
'flipper_length_mm', 'body_mass_g', 'sex']]
features = pd.get_dummies(features)
output, uniques = pd.factorize(output)
x_train, x_test, y_train, y_test = train_test_split(
features, output, test_size=.8)
rfc = RandomForestClassifier(random_state=15)
rfc.fit(x_train.values, y_train)
y_pred = rfc.predict(x_test.values)
score = accuracy_score(y_pred, y_test)
st.write('Our accuracy score for this model is {}'.format(score))
st.subheader('Save the Model Output to Pickle')
# Create output directory if it doesn't exist
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)
# Save the model
model_filename = os.path.join(output_dir, "random_forest_penguin.pickle")
with open(model_filename, "wb") as rf_pickle:
pickle.dump(rfc, rf_pickle)
# Save the uniques or other data
uniques_filename = os.path.join(output_dir, "uniques_data.pickle")
with open(uniques_filename, "wb") as output_pickle:
pickle.dump(uniques, output_pickle)
st.write("Model saved to {}".format(model_filename))
st.write("Click below to download the model.")
# Load the files to enable download
with open(model_filename, "rb") as f:
model_bytes = f.read()
st.download_button(
label="Download Trained Model (random_forest_penguin.pickle)",
data=model_bytes,
file_name="random_forest_penguin.pickle",
mime="application/octet-stream"
)
# Load the files to enable download
with open(uniques_filename, "rb") as f:
model_bytes = f.read()
st.download_button(
label="Download the data (uniques_data.pickle)",
data=model_bytes,
file_name="uniques_data.pickle",
mime="application/octet-stream"
) |