File size: 2,438 Bytes
d468d6e
a8266d2
2b3c03c
0849d01
b81ca23
 
2b3c03c
 
 
d468d6e
a8266d2
d4240f9
 
 
fa94ea3
d4240f9
 
 
 
 
 
 
 
 
 
2b3c03c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0849d01
 
b81ca23
 
 
 
 
 
 
 
 
 
0849d01
b81ca23
 
 
 
e18db79
121e981
4400619
a9f6d6c
121e981
e18db79
 
 
 
 
 
 
 
567000e
 
 
ee3b054
567000e
 
 
 
ee3b054
567000e
e18db79
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
import pickle
import os
import pickle

from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

penguin_df = pd.read_csv('src/penguins.csv')
st.write(penguin_df.head())

st.subheader('Penguin Species')

penguin_df.dropna(inplace=True)
output = penguin_df['species']

features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm',
'flipper_length_mm', 'body_mass_g', 'sex']]

features = pd.get_dummies(features)
st.write('Here are our output variables')
st.write(output.head())
st.write('Here are our feature variables')
st.write(features.head())

st.subheader('Model Training')

output = penguin_df['species']
features = penguin_df[['island', 'bill_length_mm', 'bill_depth_mm',
'flipper_length_mm', 'body_mass_g', 'sex']]

features = pd.get_dummies(features)
output, uniques = pd.factorize(output)

x_train, x_test, y_train, y_test = train_test_split(

features, output, test_size=.8)

rfc = RandomForestClassifier(random_state=15)
rfc.fit(x_train.values, y_train)

y_pred = rfc.predict(x_test.values)
score = accuracy_score(y_pred, y_test)
st.write('Our accuracy score for this model is {}'.format(score))

st.subheader('Save the Model Output to Pickle')

# Create output directory if it doesn't exist
output_dir = "outputs"
os.makedirs(output_dir, exist_ok=True)

# Save the model
model_filename = os.path.join(output_dir, "random_forest_penguin.pickle")
with open(model_filename, "wb") as rf_pickle:
    pickle.dump(rfc, rf_pickle)

# Save the uniques or other data
uniques_filename = os.path.join(output_dir, "uniques_data.pickle")
with open(uniques_filename, "wb") as output_pickle:
    pickle.dump(uniques, output_pickle)


st.write("Model saved to {}".format(model_filename))
st.write("Click below to download the model.")

# Load the files to enable download
with open(model_filename, "rb") as f:
    model_bytes = f.read()
st.download_button(
    label="Download Trained Model (random_forest_penguin.pickle)",
    data=model_bytes,
    file_name="random_forest_penguin.pickle",
    mime="application/octet-stream"
)

# Load the files to enable download
with open(uniques_filename, "rb") as f:
    model_bytes = f.read()
st.download_button(
    label="Download the data (uniques_data.pickle)",
    data=model_bytes,
    file_name="uniques_data.pickle",
    mime="application/octet-stream"
)