Spaces:
Runtime error
Runtime error
ankitajain
commited on
Commit
•
a3fdf79
1
Parent(s):
5fc12d0
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
|
5 |
+
st.subheader("K nearest neighbor (KNN) classification")
|
6 |
+
|
7 |
+
st_col = st.columns(1)[0]
|
8 |
+
|
9 |
+
K = st.slider('Number of nearest neighbors (K)', min_value=1, max_value=10, value=5, step=1)
|
10 |
+
|
11 |
+
from sklearn.neighbors import KNeighborsClassifier as KNN
|
12 |
+
from sklearn.model_selection import cross_val_score
|
13 |
+
|
14 |
+
from sklearn.datasets import make_blobs
|
15 |
+
X, y = make_blobs(n_samples=1000, centers=3, n_features=2, cluster_std=6, random_state=42)
|
16 |
+
ntrain = 100
|
17 |
+
|
18 |
+
x_train = X[:ntrain]
|
19 |
+
y_train = y[:ntrain]
|
20 |
+
x_test = X[ntrain:]
|
21 |
+
y_test = y[ntrain:]
|
22 |
+
|
23 |
+
knn = KNN(n_neighbors=K)
|
24 |
+
knn.fit(x_train, y_train)
|
25 |
+
plt.figure()
|
26 |
+
|
27 |
+
x = np.linspace(np.min(x_test[:, 0]), np.max(x_test[:, 0]), 200)
|
28 |
+
y = np.linspace(np.min(x_test[:, 1]), np.max(x_test[:, 1]), 200)
|
29 |
+
xx, yy = np.meshgrid(x, y)
|
30 |
+
xy = np.c_[xx.ravel(), yy.ravel()]
|
31 |
+
|
32 |
+
y_predicted = knn.predict(xy)
|
33 |
+
#plt.pcolormesh(y_predicted.reshape(200, 200), cmap='jet')
|
34 |
+
plt.pcolormesh(xx, yy, y_predicted.reshape(200, 200), cmap='jet', alpha=0.2)
|
35 |
+
y_unique = np.unique(y_train)
|
36 |
+
markers = '*x+'
|
37 |
+
colors = 'bgr'
|
38 |
+
for i in range(len(y_unique)):
|
39 |
+
plt.scatter(x_train[y_train == y_unique[i], 0],
|
40 |
+
x_train[y_train == y_unique[i], 1],
|
41 |
+
marker=markers[i],
|
42 |
+
c=colors[i])
|
43 |
+
|
44 |
+
|
45 |
+
with st_col:
|
46 |
+
st.pyplot(plt)
|
47 |
+
|
48 |
+
hide_streamlit_style = """
|
49 |
+
<style>
|
50 |
+
#MainMenu {visibility: hidden;}
|
51 |
+
footer {visibility: hidden;}
|
52 |
+
subheader {alignment: center;}
|
53 |
+
</style>
|
54 |
+
"""
|
55 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
56 |
+
|
57 |
+
st.markdown("""
|
58 |
+
There are several points to note on the effect of K on the quality of model fit:
|
59 |
+
* Models with extremely small values of K learn the local patterns and do not generalize well thus they have a high variance or overfitting effect.
|
60 |
+
* Models with extremely high values of K suffer from averaging effect over the entire space and thus do not do well even on the train points. This is known as a high bias or underfitting effect.
|
61 |
+
""")
|