File size: 5,445 Bytes
3ef4c7d b6cf9b4 3ef4c7d 62cc7ff 6e017dd 3ef4c7d 6480e50 3ef4c7d 6480e50 3ef4c7d c9d52c2 ce34bb9 eb88a4b 3ef4c7d ce34bb9 3ef4c7d aff1e76 3ef4c7d aff1e76 ce34bb9 7d0c189 ce34bb9 3ef4c7d ce34bb9 3ef4c7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
## CSCI4750/5750: Demo 1
## load the dataset
def demo1_derive_MNIST_train_test_data():
from sklearn.datasets import fetch_openml
import numpy as np
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
y_train = y_train.astype(np.uint8) # convert to int
y_test = y_test.astype(np.uint8) # convert to int
return X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = demo1_derive_MNIST_train_test_data()
print("X_train.shape: ", X_train.shape)
print("X_test.shape: ", X_test.shape)
print("y_train.shape: ", y_train.shape)
print("y_test.shape: ", y_test.shape)
train_features = X_train
train_labels = y_train
test_feature = X_test[0]
K = 3
print("train_features: ",train_features.shape)
print("train_labels: ",train_labels.shape)
print("test_feature: ",test_feature.shape)
# Practice 5: deploy our KNN classifier to web application, with multiple outputs
import scipy
import gradio as gr
import numpy as np
import cv2
import os
def get_sample_images(num_images):
sample_images = []
for i in range(num_images):
test_feature = X_test[i]
test_feature_2d =test_feature.reshape(28,28)
# Make it unsigned integers:
data = test_feature_2d.astype(np.uint8)
outdir = "images_folder"
img_path = os.path.join(outdir, 'local_%05d.png' % (i,))
if not os.path.exists(outdir):
os.mkdir(outdir)
cv2.imwrite(img_path, data)
sample_images.append([img_path,int(np.random.choice([7,9,11,13,15,24]))]) # ["image path", "K"]
return sample_images
# EXTRA: adapted from https://github.com/ageron/handson-ml2/blob/master/03_classification.ipynb
def plot_digits(instances, images_per_row=3):
import matplotlib.pyplot as plt
import matplotlib as mpl
size = 28
images_per_row = min(len(instances), images_per_row)
# This is equivalent to n_rows = ceil(len(instances) / images_per_row):
n_rows = (len(instances) - 1) // images_per_row + 1
n = len(instances)
fig = plt.figure(figsize=(15,8))
for i in range(len(instances)):
# Debug, plot figure
fig.add_subplot(n_rows, images_per_row, i + 1)
#print(instances[i])
plt.imshow(instances[i].reshape(size,size), cmap = mpl.cm.binary)
plt.axis("off")
plt.title("Neighbor "+str(i+1), size=20)
fig.tight_layout()
plt.savefig('results.png', dpi=300)
return 'results.png'
## machine learning classifier
def KNN_predict(train_features, train_labels, test_feature, K):
label_record = []
for i in range(len(train_features)):
train_point_feature = train_features[i]
test_point_feature = test_feature.flatten()
### (1) calculate distance between test feature and each of training data points
# get distance for data point i
dis = scipy.spatial.distance.euclidean(train_point_feature, test_point_feature)
# collect lable for datapoint i
y = train_labels[i]
label_record.append((dis, y, train_point_feature))
# sort data points by distance
from operator import itemgetter
sorted_labels = sorted(label_record,key=itemgetter(0))
# get major class from top K neighbors
major_class = []
neighbor_imgs = []
for k in range(K):
major_class.append(sorted_labels[k][1])
# at most 24 neighbors for visualization
if k <24:
neighbor_feature = sorted_labels[k][2]
neighbor_imgs.append(neighbor_feature)
### get final prediction
final_prediction = scipy.stats.mode(major_class)
### get frequency of classes
class_freq = {}
for i in range(0,10):
class_freq['Digit '+str(i)] = float(major_class.count(i)) / len(major_class)
### get neighbor images and save to local
neighbor_imgs =np.array(neighbor_imgs)
image_path = plot_digits(neighbor_imgs, images_per_row=6)
return final_prediction, class_freq, image_path
### main function for gradio to call to classify image
def call_our_KNN(test_image, K=7):
test_image_flatten = test_image.reshape((-1, 28*28))
y_pred_each, y_prob_each, image_path = KNN_predict(train_features, train_labels, test_image_flatten, int(K))
return y_pred_each, y_prob_each, image_path
### generate several example cases
sample_images = get_sample_images(10)
### configure inputs/outputs
set_image = gr.inputs.Image(shape=(28, 28), image_mode='L')
set_K = gr.inputs.Slider(1, 24, step=1, default=7)
set_label = gr.outputs.Textbox(label="Predicted Digit")
# define output as the single class text
set_probability = gr.outputs.Label(num_top_classes=10, label="Predicted Probability Per Class")
set_out_images = gr.outputs.Image(label="Closest Neighbors")
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=call_our_KNN,
inputs=[set_image, set_K],
outputs=[set_label,set_probability,set_out_images],
examples_per_page = 2,
examples = sample_images,
title="CSCI4750/5750 Demo 1: Digit classification using KNN algorithm",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch(debug=True) |