File size: 5,445 Bytes
3ef4c7d
b6cf9b4
3ef4c7d
 
 
 
 
 
 
62cc7ff
 
6e017dd
3ef4c7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6480e50
3ef4c7d
 
6480e50
3ef4c7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c9d52c2
ce34bb9
 
 
 
eb88a4b
3ef4c7d
 
 
 
 
ce34bb9
3ef4c7d
 
 
 
aff1e76
 
3ef4c7d
 
 
 
 
 
 
 
 
 
aff1e76
ce34bb9
7d0c189
ce34bb9
3ef4c7d
 
 
 
 
 
ce34bb9
3ef4c7d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
## CSCI4750/5750: Demo 1

## load the dataset 
def demo1_derive_MNIST_train_test_data():
    from sklearn.datasets import fetch_openml
    import numpy as np
    mnist = fetch_openml('mnist_784', version=1, as_frame=False)
    X, y = mnist["data"], mnist["target"]
    X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
    y_train = y_train.astype(np.uint8) # convert to int
    y_test = y_test.astype(np.uint8) # convert to int
    return X_train, X_test, y_train, y_test 

X_train, X_test, y_train, y_test = demo1_derive_MNIST_train_test_data()
print("X_train.shape: ", X_train.shape)
print("X_test.shape: ", X_test.shape)
print("y_train.shape: ", y_train.shape)
print("y_test.shape: ", y_test.shape)

train_features = X_train
train_labels = y_train
test_feature = X_test[0]
K = 3
print("train_features: ",train_features.shape)
print("train_labels: ",train_labels.shape)
print("test_feature: ",test_feature.shape)

# Practice 5: deploy our KNN classifier to web application, with multiple outputs

import scipy
import gradio as gr
import numpy as np
import cv2
import os

def get_sample_images(num_images):
    sample_images = []
    for i in range(num_images):
      test_feature = X_test[i]
      test_feature_2d =test_feature.reshape(28,28)
    
      # Make it unsigned integers:
      data = test_feature_2d.astype(np.uint8)

      outdir =  "images_folder"
      img_path = os.path.join(outdir, 'local_%05d.png' % (i,))
      if not os.path.exists(outdir):
         os.mkdir(outdir)
      cv2.imwrite(img_path, data)

      sample_images.append([img_path,int(np.random.choice([7,9,11,13,15,24]))])   # ["image path", "K"]
    return sample_images

# EXTRA: adapted from https://github.com/ageron/handson-ml2/blob/master/03_classification.ipynb
def plot_digits(instances, images_per_row=3):
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    size = 28
    images_per_row = min(len(instances), images_per_row)
    # This is equivalent to n_rows = ceil(len(instances) / images_per_row):
    n_rows = (len(instances) - 1) // images_per_row + 1

    n = len(instances)

    fig = plt.figure(figsize=(15,8))
    for i in range(len(instances)):
        # Debug, plot figure
        fig.add_subplot(n_rows, images_per_row, i + 1)
        #print(instances[i])
        plt.imshow(instances[i].reshape(size,size), cmap = mpl.cm.binary)
        plt.axis("off")
        plt.title("Neighbor "+str(i+1), size=20)
    fig.tight_layout()

    plt.savefig('results.png', dpi=300)
    return 'results.png'

    
## machine learning classifier
def KNN_predict(train_features, train_labels, test_feature, K):
  label_record = []
  for i in range(len(train_features)):
    train_point_feature = train_features[i]
    test_point_feature = test_feature.flatten()
    ### (1) calculate distance between test feature and each of training data points

    # get distance for data point i
    dis = scipy.spatial.distance.euclidean(train_point_feature, test_point_feature)
    
    # collect lable for datapoint i 
    y = train_labels[i]
    label_record.append((dis, y, train_point_feature))
  
  # sort data points by distance 
  from operator import itemgetter
  sorted_labels = sorted(label_record,key=itemgetter(0))
  # get major class from top K neighbors 
  major_class = []
  neighbor_imgs = []
  for k in range(K):
    major_class.append(sorted_labels[k][1])

    # at most 24 neighbors for visualization
    if k <24:
      neighbor_feature = sorted_labels[k][2]
      neighbor_imgs.append(neighbor_feature)
  
  ### get final prediction
  final_prediction = scipy.stats.mode(major_class)
  
  ### get frequency of classes
  class_freq = {}
  for i in range(0,10):
      class_freq['Digit '+str(i)] = float(major_class.count(i)) / len(major_class)

  ### get neighbor images and save to local 
  neighbor_imgs =np.array(neighbor_imgs) 
  image_path = plot_digits(neighbor_imgs, images_per_row=6)

  return final_prediction, class_freq, image_path

### main function for gradio to call to classify image
def call_our_KNN(test_image, K=7):
  test_image_flatten = test_image.reshape((-1, 28*28))
  y_pred_each, y_prob_each, image_path = KNN_predict(train_features, train_labels, test_image_flatten, int(K))
  return y_pred_each, y_prob_each, image_path


### generate several example cases
sample_images = get_sample_images(10)

### configure inputs/outputs
set_image = gr.inputs.Image(shape=(28, 28), image_mode='L')
set_K = gr.inputs.Slider(1, 24, step=1, default=7)

set_label = gr.outputs.Textbox(label="Predicted Digit")

# define output as the single class text
set_probability = gr.outputs.Label(num_top_classes=10, label="Predicted Probability Per Class")

set_out_images = gr.outputs.Image(label="Closest Neighbors")


### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=call_our_KNN, 
                         inputs=[set_image, set_K], 
                         outputs=[set_label,set_probability,set_out_images],
                         examples_per_page = 2,
                         examples = sample_images, 
                         title="CSCI4750/5750 Demo 1: Digit classification using KNN algorithm", 
                         description= "Click examples below for a quick demo",
                         theme = 'huggingface',
                         layout = 'vertical'
                         )
interface.launch(debug=True)