File size: 3,175 Bytes
976d902
 
48a67eb
976d902
8bf1b11
976d902
 
 
 
 
 
 
 
 
 
7795094
9783e49
 
 
976d902
 
 
 
 
fd36a54
976d902
380c4f4
 
 
 
cfd06f9
 
 
 
 
 
acc4c62
380c4f4
136a90e
 
380c4f4
 
 
 
976d902
 
 
 
 
 
 
 
8bf1b11
 
 
976d902
 
 
 
 
 
00b631c
8bf1b11
976d902
8bf1b11
 
976d902
 
136a90e
976d902
cfd06f9
976d902
 
 
 
 
b62e326
976d902
b62e326
976d902
 
9783e49
976d902
 
cfd06f9
976d902
 
 
 
cfd06f9
976d902
 
 
099f58f
976d902
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
import gradio as gr
import pandas as pd


def homework01_solution1(K, X1, X2):

  K = int(K)

  # Verify your solutions by code
  import numpy as np
  X = np.array([[2.0, 5.0], [3.0, 4.0], [3.0, 2.0], [1.0, 3.0], [5.0, 2.0], [7.0, 1.0],
                [6.0, 3.0], [7.0, 4.0]])
  y = np.array([2.9,4.2,5.8,3.2,8.9,9.2,7.4,8.2])

  import pandas as pd
  train_data = pd.DataFrame(X, columns=['X1', 'X2'])
  train_data['Label (Y)'] = y

  from sklearn.neighbors import KNeighborsRegressor
  #(1) predict class for point (3,3) with K = 3

  neigh = KNeighborsRegressor(n_neighbors=K)
  neigh.fit(X, y)
  predicted_label = neigh.predict(np.array([[X1, X2]]))[0]

  #(Q) calculate squared error
  pred = neigh.predict(X)
  squared_error = (pred-y)**2

  #(Q2.3) evaluate mean squared error 
  import sklearn
  y_pred = neigh.predict(X)
  mse = sklearn.metrics.mean_squared_error(y,y_pred)


  train_data['Predicted Label ('+str(K)+'-NN)'] = pred
  train_data['Squaredd Error'] = squared_error
  train_data['Predicted Label ('+str(K)+'-NN)'] = train_data['Predicted Label ('+str(K)+'-NN)'].round(3)
  train_data['Squaredd Error'] = train_data['Squaredd Error'].round(3)

  
    

  (nb_dist, nb_indice) = neigh.kneighbors(np.array([[X1, X2]]), K)

  import pandas as pd

  results = pd.DataFrame(columns=['Rank of closest neighbor', 'Features (X_1,X_2)', 	'Label (Y)', 	'Distance to query data'])

  for i in range(K):
    idx = nb_indice[0][i]
    fea = X[idx].tolist()
    fea = '({})'.format(', '.join(map(str, fea)))

    dist = nb_dist[0][i]
    label = y[idx]

    #print(idx, fea, dist, label)

    # Dictionary to append
    new_data = {'Rank of closest neighbor': i, 'Features (X_1,X_2)': fea, 	'Label (Y)':label , 	'Distance to query data': dist}
    tmp = pd.DataFrame(new_data, index=[0])
    # Append dictionary to DataFrame
    #data = data.append(new_data, ignore_index=True)
    results = pd.concat([results, tmp], ignore_index=True)

  results = results.sort_values(by='Rank of closest neighbor')
  results['Distance to query data'] = results['Distance to query data'].round(3)
  
  return train_data, results, predicted_label, mse




### configure inputs
set_K = gr.Number(value=7)
set_X1 = gr.Number(value=1)
set_X2 = gr.Number(value=2)

### configure outputs
set_output_traindata = gr.Dataframe(type='pandas', label ='Train Dataset')
set_output_q1a = gr.Dataframe(type='pandas', label ='Question 1: KNN-Regressor Search')
set_output_q1b = gr.Textbox(label ='Question 1: KNN-Regressor Prediction')
set_output_q3 = gr.Textbox(label ='Question 3: KNN-Regressor MSE (Training data)')

### configure Gradio
interface = gr.Interface(fn=homework01_solution1, 
                         inputs=[set_K, set_X1, set_X2], 
                         outputs=[set_output_traindata, set_output_q1a, set_output_q1b, set_output_q3],
                         
                         title="CSCI4750/5750(hw01-PartI): Mathematics for KNN (Question 1: KNN-Regressor Search)", 
                         description= "Click examples below for a quick demo",
                         theme = 'huggingface'
                         )


interface.launch(debug=True)