In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
tf.compat.v1.disable_v2_behavior()
tf.get_logger().setLevel('ERROR')

!pip install umap-learn
import umap

# Download data

MNIST privacy rankings from [Distribution Density, Tails, and Outliers in Machine Learning: Metrics and Applications](https://arxiv.org/abs/1910.13427).


In [None]:
%%capture
!curl -L https://github.com/tensorflow/privacy/releases/download/0.2.3/order.tgz -o order.tgz
!tar zxvf order.tgz

In [None]:
mnist_priv_train = np.load('data/order_mnist_priv_train.npy')
mnist_priv_test = np.load('data/order_mnist_priv_test.npy')

In [None]:
mnist_priv_train.shape

In [None]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train_orig = x_train

In [None]:
x_train.shape

# The top and bottom "3" digits

In [None]:
trainList = []

for i, d in enumerate(np.argsort(mnist_priv_train)):
 trainList.append({
 'priv_order': d,
 'y': y_train[i],
 'i': i
 })

df = pd.DataFrame(trainList)

In [None]:
top3df = df[df['y'] == 3].sort_values(['priv_order'], ascending=True).head(10)

f, axarr = plt.subplots(1, 10) 
for i, d in enumerate(top3df['i'].to_list()):
 axarr[i].imshow(x_train[d])

In [None]:
bot3df = df[df['y'] == 3].sort_values(['priv_order'], ascending=False).head(10)

f, axarr = plt.subplots(1, 10) 
for i, d in enumerate(bot3df['i'].to_list()):
 axarr[i].imshow(x_train[d])

# UMAP

Embeddings of each training MNIST digit training projected with UMAP. 


In [None]:
train, test = tf.keras.datasets.mnist.load_data()
train_data, train_labels = train
test_data, test_labels = test

train_data = np.array(train_data, dtype=np.float32) / 255
test_data = np.array(test_data, dtype=np.float32) / 255

train_data = train_data.reshape(train_data.shape[0], 28, 28, 1)
test_data = test_data.reshape(test_data.shape[0], 28, 28, 1)

train_labels = np.array(train_labels, dtype=np.int32)
test_labels = np.array(test_labels, dtype=np.int32)

train_labels = tf.keras.utils.to_categorical(train_labels, num_classes=10)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

In [None]:
model_784 = tf.keras.Sequential([
 tf.keras.layers.Conv2D(16, 8,
 strides=2,
 padding='same',
 activation='relu',
 input_shape=(28, 28, 1)),
 tf.keras.layers.MaxPool2D(2, 1),
 tf.keras.layers.Conv2D(32, 4,
 strides=2,
 padding='valid',
 activation='relu'),
 tf.keras.layers.MaxPool2D(2, 1),
 tf.keras.layers.Flatten(),
 tf.keras.layers.Dense(28*28, activation='relu', name='embedding_784'), 
 tf.keras.layers.Dense(32, activation='relu', name='embedding_32'), 
 tf.keras.layers.Dense(10, name='logit')
])

model_784.summary()

In [None]:
loss = tf.keras.losses.CategoricalCrossentropy(
 from_logits=True, reduction=tf.losses.Reduction.NONE)

model_784.compile(loss=loss, optimizer="adam", metrics=["accuracy"])

model_784.fit(
 train_data, 
 train_labels, 
 validation_data=(test_data, test_labels),
 epochs=10, 
 batch_size=250,
 verbose=2,
)

In [None]:
embedding_layer_model_784 = tf.keras.Model(
 inputs=model_784.input, 
 outputs=model_784.get_layer('embedding_784').output)

with tf.compat.v1.Session() as sess:
 sess.run(tf.compat.v1.global_variables_initializer())
 train_embeddings_784 = sess.run(embedding_layer_model_784(train_data))

In [None]:
def umapDigit(digit=0, embeddings=train_embeddings_784, digit_type='', slug='784_'):
 dfN = df[df['y'] == digit]
 embeddingsN = embeddings.take(dfN['i'].to_list(), axis=0)

 reducer = umap.UMAP(random_state=42, min_dist=.05, n_neighbors=8)
 umap_xy = reducer.fit_transform(embeddingsN)

 fig, ax = plt.subplots(figsize=(6, 6))

 color = dfN['priv_order']
 plt.scatter(umap_xy[:, 0], umap_xy[:, 1], c=color, cmap="Spectral", s=3)
 plt.setp(ax, xticks=[], yticks=[])
 plt.title("MNIST " + str(digit) + " - UMAP", fontsize=18)

 plt.show()

 rootdir = 'umap-digits/'
 outpath = rootdir + 'umap_train_' + slug + digit_type + str(digit) + '.npy'
 with open(outpath, 'w') as outfile:
 np.save(outfile, umap_xy)


In [None]:
for i in range(0, 10):
 umapDigit(i)