uservipin's picture
requrements.txt having only required files
63d4d1b
raw
history blame
2.03 kB
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import warnings
import streamlit as st
from io import StringIO
st.title("Penguin Species Classification")
spectra = st.file_uploader("upload file", type={"csv", "txt"})
if spectra is not None:
# print("Spectra", spectra)
spectra_df = pd.read_csv(spectra)
st.write(spectra_df.head(5))
# dataset
headers = spectra_df.columns.tolist()
total_rows = spectra_df.shape[0]
st.write("Headers", headers)
st.write("Total Rows", total_rows)
"""
# fig = plt.figure(figsize=(15,6))
df.boxplot()
plt.show()
df1 = df[~((df['flipper_length_mm']>4000) | (df['flipper_length_mm']<0)) ]
df1
df2 = pd.get_dummies(df1).drop("sex_.", axis=1)
df2
# perform preprocessing steps on the dataset - scaling
scalar = StandardScaler()
X = scalar.fit_transform(df2)
df_preprocessed = pd.DataFrame(data=X, columns=df2.columns)
df_preprocessed
import numpy as np
df_preprocessed = df_preprocessed.replace(np.nan, 0)
# apply PCA
pca = PCA(n_components= None)
dfx_pca = pca.fit(df_preprocessed)
dfx_pca.explained_variance_ratio_
n_components = sum(dfx_pca.explained_variance_ratio_>0.1)
print("Components :", n_components)
pca = PCA(n_components= n_components)
df_pca = pca.fit_transform(df_preprocessed)
dfx_pca.explained_variance_ratio_
inertia = []
for k in range(1, 10):
kmeans = KMeans(n_clusters=k, random_state=42).fit(df_preprocessed)
inertia.append(kmeans.inertia_)
plt.plot(range(1, 10), inertia, marker="o")
plt.xlabel("Number of clusters")
plt.ylabel("Inertia")
plt.title("Elbow Method")
plt.show()
n_clusters = 4
n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(df_preprocessed)
plt.scatter(df_pca[:, 0], df_pca[:, 1], c=kmeans.labels_, cmap="viridis")
plt.xlabel("First Principal Component")
plt.ylabel("Second Principal Component")
plt.title(f"K-means Clustering (K={n_clusters})")
plt.show() """