import matplotlib.pyplot as plt import numpy as np import seaborn as sns import streamlit as st from sklearn.datasets import make_regression from sklearn.metrics import mean_squared_error from sklearn.neighbors import KNeighborsRegressor st.subheader("K nearest neighbor (KNN) Regressor") st_col = st.columns(1)[0] K = st.slider( "Number of nearest neighbors (K)", min_value=1, max_value=10, value=5, step=1 ) option = st.selectbox( "Select Distance Metric", ("L1(Manhattan)", "L2(Euclidean Distance)") ) X, y = make_regression(n_samples=100, n_features=1, noise=0.3, random_state=42) ntrain = 30 x_train = X[:ntrain] y_train = y[:ntrain] x_test = X[ntrain:] y_test = y[ntrain:] if str(option) == "L1(Manhattan)": metric = "manhattan" else: metric = "euclidean" knn = KNeighborsRegressor(n_neighbors=K, metric=metric) knn.fit(x_train, y_train) y_pred = knn.predict(x_test) plt.figure() plt.plot(y_test[:30], "C0s", label="True Points (Test)") plt.plot(y_pred[:30], "C1*", label="Predictions (Test)") plt.xlabel("X") plt.ylabel("Y") plt.legend(loc="upper left") plt.ylim(-90,90) sns.despine(right=True, top=True) with st_col: st.pyplot(plt) error = mean_squared_error(y_test, y_pred) st.write("The mean squared error is %.2f" % error) hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.markdown( """ The above plot shows the True values and Predictions for 30 points in the test set. It can be observed that the optimal value of K is 3 for the given dataset. """, unsafe_allow_html=True, )