jiehou's picture
Update app.py
c9d52c2
## CSCI4750/5750: Demo 1
## load the dataset
def demo1_derive_MNIST_train_test_data():
from sklearn.datasets import fetch_openml
import numpy as np
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
y_train = y_train.astype(np.uint8) # convert to int
y_test = y_test.astype(np.uint8) # convert to int
return X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = demo1_derive_MNIST_train_test_data()
print("X_train.shape: ", X_train.shape)
print("X_test.shape: ", X_test.shape)
print("y_train.shape: ", y_train.shape)
print("y_test.shape: ", y_test.shape)
train_features = X_train
train_labels = y_train
test_feature = X_test[0]
K = 3
print("train_features: ",train_features.shape)
print("train_labels: ",train_labels.shape)
print("test_feature: ",test_feature.shape)
# Practice 5: deploy our KNN classifier to web application, with multiple outputs
import scipy
import gradio as gr
import numpy as np
import cv2
import os
def get_sample_images(num_images):
sample_images = []
for i in range(num_images):
test_feature = X_test[i]
test_feature_2d =test_feature.reshape(28,28)
# Make it unsigned integers:
data = test_feature_2d.astype(np.uint8)
outdir = "images_folder"
img_path = os.path.join(outdir, 'local_%05d.png' % (i,))
if not os.path.exists(outdir):
os.mkdir(outdir)
cv2.imwrite(img_path, data)
sample_images.append([img_path,int(np.random.choice([7,9,11,13,15,24]))]) # ["image path", "K"]
return sample_images
# EXTRA: adapted from https://github.com/ageron/handson-ml2/blob/master/03_classification.ipynb
def plot_digits(instances, images_per_row=3):
import matplotlib.pyplot as plt
import matplotlib as mpl
size = 28
images_per_row = min(len(instances), images_per_row)
# This is equivalent to n_rows = ceil(len(instances) / images_per_row):
n_rows = (len(instances) - 1) // images_per_row + 1
n = len(instances)
fig = plt.figure(figsize=(15,8))
for i in range(len(instances)):
# Debug, plot figure
fig.add_subplot(n_rows, images_per_row, i + 1)
#print(instances[i])
plt.imshow(instances[i].reshape(size,size), cmap = mpl.cm.binary)
plt.axis("off")
plt.title("Neighbor "+str(i+1), size=20)
fig.tight_layout()
plt.savefig('results.png', dpi=300)
return 'results.png'
## machine learning classifier
def KNN_predict(train_features, train_labels, test_feature, K):
label_record = []
for i in range(len(train_features)):
train_point_feature = train_features[i]
test_point_feature = test_feature.flatten()
### (1) calculate distance between test feature and each of training data points
# get distance for data point i
dis = scipy.spatial.distance.euclidean(train_point_feature, test_point_feature)
# collect lable for datapoint i
y = train_labels[i]
label_record.append((dis, y, train_point_feature))
# sort data points by distance
from operator import itemgetter
sorted_labels = sorted(label_record,key=itemgetter(0))
# get major class from top K neighbors
major_class = []
neighbor_imgs = []
for k in range(K):
major_class.append(sorted_labels[k][1])
# at most 24 neighbors for visualization
if k <24:
neighbor_feature = sorted_labels[k][2]
neighbor_imgs.append(neighbor_feature)
### get final prediction
final_prediction = scipy.stats.mode(major_class)
### get frequency of classes
class_freq = {}
for i in range(0,10):
class_freq['Digit '+str(i)] = float(major_class.count(i)) / len(major_class)
### get neighbor images and save to local
neighbor_imgs =np.array(neighbor_imgs)
image_path = plot_digits(neighbor_imgs, images_per_row=6)
return final_prediction, class_freq, image_path
### main function for gradio to call to classify image
def call_our_KNN(test_image, K=7):
test_image_flatten = test_image.reshape((-1, 28*28))
y_pred_each, y_prob_each, image_path = KNN_predict(train_features, train_labels, test_image_flatten, int(K))
return y_pred_each, y_prob_each, image_path
### generate several example cases
sample_images = get_sample_images(10)
### configure inputs/outputs
set_image = gr.inputs.Image(shape=(28, 28), image_mode='L')
set_K = gr.inputs.Slider(1, 24, step=1, default=7)
set_label = gr.outputs.Textbox(label="Predicted Digit")
# define output as the single class text
set_probability = gr.outputs.Label(num_top_classes=10, label="Predicted Probability Per Class")
set_out_images = gr.outputs.Image(label="Closest Neighbors")
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider
interface = gr.Interface(fn=call_our_KNN,
inputs=[set_image, set_K],
outputs=[set_label,set_probability,set_out_images],
examples_per_page = 2,
examples = sample_images,
title="CSCI4750/5750 Demo 1: Digit classification using KNN algorithm",
description= "Click examples below for a quick demo",
theme = 'huggingface',
layout = 'vertical'
)
interface.launch(debug=True)