File size: 2,085 Bytes
4955e1f 81bca1f c26ca08 4955e1f 162414e 4955e1f 7598291 4955e1f e9dc88a 7ae341f 4955e1f c26ca08 4955e1f b283e2a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
st.subheader("K nearest neighbor (KNN) classification")
st_col = st.columns(1)[0]
K = st.slider('Number of nearest neighbors (K)', min_value=1, max_value=10, value=5, step=1)
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.model_selection import cross_val_score
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=1000, centers=3, n_features=2, cluster_std=6, random_state=42)
ntrain = 100
x_train = X[:ntrain]
y_train = y[:ntrain]
x_test = X[ntrain:]
y_test = y[ntrain:]
knn = KNN(n_neighbors=K)
knn.fit(x_train, y_train)
plt.figure()
x = np.linspace(np.min(x_test[:, 0]), np.max(x_test[:, 0]), 200)
y = np.linspace(np.min(x_test[:, 1]), np.max(x_test[:, 1]), 200)
xx, yy = np.meshgrid(x, y)
xy = np.c_[xx.ravel(), yy.ravel()]
y_predicted = knn.predict(xy)
#plt.pcolormesh(y_predicted.reshape(200, 200), cmap='jet')
plt.pcolormesh(xx, yy, y_predicted.reshape(200, 200), cmap='jet', alpha=0.2)
y_unique = np.unique(y_train)
markers = '*x+'
colors = 'bgr'
for i in range(len(y_unique)):
plt.scatter(x_train[y_train == y_unique[i], 0],
x_train[y_train == y_unique[i], 1],
marker=markers[i],
c=colors[i])
with st_col:
st.pyplot(plt)
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
subheader {alignment: center;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
st.markdown("""
There are several points to note on the effect of K on the quality of model fit:
* Models with extremely small values of K learn the local patterns and do not generalize well thus they have a high variance or overfitting effect.
* Models with extremely high values of K suffer from averaging effect over the entire space and thus do not do well even on the train points. This is known as a high bias or underfitting effect.
""") |