penguins / app.py
nowave's picture
Upload 2 files
96a4277
import pickle
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import streamlit as st
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
st.title("Penguin Classifier")
st.write(
"""App uses 6 inputs to predict
the species of penguin using a model
built on the Palmer's Penguins dataset.
"""
)
# password 설정
password_guess = st.text_input("Input Password?")
if password_guess != "streamlit":
st.stop()
# updata csvfile
penguin_file = st.file_uploader("Upload your own data")
if penguin_file is None:
rf_pickle = open("models/rf_penguin.pickle", "rb")
map_pickle = open("models/class_penguin.pickle", "rb")
rfc = pickle.load(rf_pickle)
unique_penguin_mapping = pickle.load(map_pickle)
rf_pickle.close()
map_pickle.close()
penguin_df = pd.read_csv("penguins.csv")
else:
penguin_df = pd.read_csv(penguin_file)
penguin_df['sex'].fillna(penguin_df['sex'].mode()[0], inplace=True)
penguin_df = penguin_df.dropna()
output = penguin_df["species"]
features = penguin_df[
[
"island",
"bill_length_mm",
"bill_depth_mm",
"flipper_length_mm",
"body_mass_g",
"sex",
]
]
features = pd.get_dummies(features)
output, unique_penguin_mapping = pd.factorize(output)
x_train, x_test, y_train, y_test = train_test_split(features, output, test_size=0.8)
rfc = RandomForestClassifier(random_state=15)
rfc.fit(x_train.values, y_train)
y_pred = rfc.predict(x_test.values)
score = round(accuracy_score(y_pred, y_test), 2)
rf_pickle = open("models/rf_penguin.pickle", "wb")
pickle.dump(rfc, rf_pickle)
rf_pickle.close()
output_pickle = open("models/class_penguin.pickle", "wb")
pickle.dump(unique_penguin_mapping, output_pickle)
output_pickle.close()
st.write(
f"""Trained a Random Forest model on these data,
it has a score of {score}! """
)
# selectbox, button 만들기
with st.form("user_inputs"):
island = st.selectbox("Penguin Island", options=["Biscoe", "Dream", "Torgerson"])
sex = st.selectbox("Sex", options=["Female", "Male"])
bill_length = st.number_input("Bill Length (mm)", min_value=0)
bill_depth = st.number_input("Bill Depth (mm)", min_value=0)
flipper_length = st.number_input("Flipper Length (mm)", min_value=0)
body_mass = st.number_input("Body Mass (g)", min_value=0)
st.form_submit_button()
island_biscoe, island_dream, island_torgerson = 0, 0, 0
if island == "Biscoe":
island_biscoe = 1
elif island == "Dream":
island_dream = 1
elif island == "Torgerson":
island_torgerson = 1
sex_female, sex_male = 0, 0
if sex == "Female":
sex_female = 1
elif sex == "Male":
sex_male = 1
# Predction
new_prediction = rfc.predict(
[
[
bill_length,
bill_depth,
flipper_length,
body_mass,
island_biscoe,
island_dream,
island_torgerson,
sex_female,
sex_male,
]
]
)
st.subheader("Predicting Your Penguin's Species:")
prediction_species = unique_penguin_mapping[new_prediction][0]
st.write(f"# Prediction Species: **{prediction_species}")
st.write(
"""Machine learning
(Random Forest) model to predict the
species, the features used in this
prediction are ranked by relative
importance below."""
)
st.write(
"""Below are the histograms for each
continuous variable separated by penguin species.
The vertical line represents the inputted value."""
)
fig, ax = plt.subplots()
ax = sns.displot(x=penguin_df["bill_length_mm"], hue=penguin_df["species"])
plt.axvline(bill_length)
plt.title("Bill Length by Species")
st.pyplot(ax)
fig, ax = plt.subplots()
ax = sns.displot(x=penguin_df["bill_depth_mm"], hue=penguin_df["species"])
plt.axvline(bill_depth)
plt.title("Bill Depth by Species")
st.pyplot(ax)
fig, ax = plt.subplots()
ax = sns.displot(x=penguin_df["flipper_length_mm"], hue=penguin_df["species"])
plt.axvline(flipper_length)
plt.title("Flipper Length by Species")
st.pyplot(ax)