import streamlit as st import numpy as np import matplotlib.pyplot as plt st.subheader("K nearest neighbor (KNN) classification") st_col = st.columns(1)[0] K = st.slider('Number of nearest neighbors (K)', min_value=1, max_value=10, value=5, step=1) from sklearn.neighbors import KNeighborsClassifier as KNN from sklearn.model_selection import cross_val_score from sklearn.datasets import make_blobs X, y = make_blobs(n_samples=1000, centers=3, n_features=2, cluster_std=6, random_state=42) ntrain = 100 x_train = X[:ntrain] y_train = y[:ntrain] x_test = X[ntrain:] y_test = y[ntrain:] knn = KNN(n_neighbors=K) knn.fit(x_train, y_train) plt.figure() x = np.linspace(np.min(x_test[:, 0]), np.max(x_test[:, 0]), 200) y = np.linspace(np.min(x_test[:, 1]), np.max(x_test[:, 1]), 200) xx, yy = np.meshgrid(x, y) xy = np.c_[xx.ravel(), yy.ravel()] y_predicted = knn.predict(xy) #plt.pcolormesh(y_predicted.reshape(200, 200), cmap='jet') plt.pcolormesh(xx, yy, y_predicted.reshape(200, 200), cmap='jet', alpha=0.2) y_unique = np.unique(y_train) markers = '*x+' colors = 'bgr' for i in range(len(y_unique)): plt.scatter(x_train[y_train == y_unique[i], 0], x_train[y_train == y_unique[i], 1], marker=markers[i], c=colors[i]) with st_col: st.pyplot(plt) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.markdown(""" There are several points to note on the effect of K on the quality of model fit: * Models with extremely small values of K learn the local patterns and do not generalize well thus they have a high variance or overfitting effect. * Models with extremely high values of K suffer from averaging effect over the entire space and thus do not do well even on the train points. This is known as a high bias or underfitting effect. """)