jeduardogruiz commited on May 7, 2024

Commit

516a027

verified ·

1 Parent(s): 92ee7ff

Upload 22 files

Browse files

Files changed (22) hide show

README.md +2 -3
__init__.py +14 -0
botWallet.js +49 -0
clipping.py +157 -0
clipping_test.py +170 -0
cluster_preserve_integration_test.py +709 -0
cluster_preserve_quantize_registry.py +539 -0
cluster_preserve_quantize_registry_test.py +150 -0
collaborative_optimization.png +0 -0
collaborative_optimization_dist.png +0 -0
cripto.jpg +0 -0
deep_crypto.py +18 -0
default_n_bit_transforms.py +825 -0
main.py +29 -0
misc.py +173 -0
misc_test.py +192 -0
mnist_cnn.py +190 -0
mnist_e2e_sparsity2x4.py +153 -0
periodical_update_and_scheduling_test.py +222 -0
prune_preserve_quantize_registry.py +339 -0
readme.txt +204 -0
same_training_and_inference_test.py +210 -0

README.md CHANGED Viewed

@@ -1,3 +1,2 @@
----
-license: apache-2.0
----


1	+ This directory is modified based on default_8bit, which allows you to manually
2	+ change the number of bits of weight and activation in QAT.

__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

botWallet.js ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+# Código de tu aplicación aquí
+if __name__ == "__main__":
+    os.system("python main.py")
+const TelegramBot = require('node-telegram-bot-api');
+const Web3 = require('web3');
+const web3 = new Web3(new Web3.providers.HttpProvider('https://mainnet.infura.io/v3/YOUR_PROJECT_ID'));
+const contractAddress = data;
+const contractABI = [...]; // ABI del contrato inteligente
+// Reemplaza 'YOUR_BOT_TOKEN' con el token de tu bot de Telegram
+const bot(0x68749665FF8D2d112Fa859AA293F07A622782F38) = new TelegramBot('6616997752:AAEU4xrcNzdykjr1flv3BpqKNq1NZCHLEcE', {polling: true});
+bot.on('message', async (msg) => {
+  const chatId = msg.chat.id;
+  const text = msg.text;
+  if (text === '/start') {
+    await bot.sendMessage(chatId, '¡Bienvenido al bot de pago de Ethereum!');
+  } else if (text === '/pay') {
+    const userWalletAddress = sessionStorage.getItem('userWalletAddress');
+    const amountToPay = sessionStorage.getItem('amountToPay');
+    if (!userWalletAddress || !amountToPay) {
+      await bot.sendMessage(chatId, 'Por favor, configure la billetera y el monto a pagar antes de realizar el pago.');
+      return;
+    }
+    const contract = new web3.eth.Contract(contractABI, contractAddress);
+    try {
+      await contract.methods.pay(userWalletAddress, amountToPay).send({ from: '0x...', gas: 1000000 }); // Dirección de la billetera que realiza el pago
+      await bot.sendMessage(chatId, '¡Transacción realizada exitosamente!');
+    } catch (error) {
+      await
+        bot.sendMessage(chatId, `Error al realizar la transacción: ${error.message}`);
+    }
+  } else {
+    await bot.sendMessage(chatId, 'Comando no válido. Por favor, utiliza /start o /pay.');
+  }
+});
+/startBot (i);
+/openMiner(extract500000 XAUT"TETHER GOLD")
+//time.colapse(1)
+//time.sleep(1)
+//time.wav(all)

clipping.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# Copyright 2019, The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Encoding stages implementing various clipping strategies.
+The base classes, `ClipByNormEncodingStage` and `ClipByValueEncodingStage`, are
+expected to be subclassed as implementations of
+`AdaptiveEncodingStageInterface`, to realize a variety of clipping strategies
+that are adaptive to the data being processed in an iterative execution.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.core import encoding_stage
+@encoding_stage.tf_style_encoding_stage
+class ClipByNormEncodingStage(encoding_stage.EncodingStageInterface):
+  """Encoding stage applying clipping by norm (L-2 ball projection).
+  See `tf.clip_by_norm` for more information.
+  """
+  ENCODED_VALUES_KEY = 'clipped_values'
+  NORM_PARAMS_KEY = 'norm_param'
+  def __init__(self, clip_norm):
+    """Initializer for the `ClipByNormEncodingStage`.
+    Args:
+      clip_norm: A scalar, norm of the ball onto which to project.
+    """
+    self._clip_norm = clip_norm
+  @property
+  def name(self):
+    """See base class."""
+    return 'clip_by_norm'
+  @property
+  def compressible_tensors_keys(self):
+    """See base class."""
+    return [self.ENCODED_VALUES_KEY]
+  @property
+  def commutes_with_sum(self):
+    """See base class."""
+    return True
+  @property
+  def decode_needs_input_shape(self):
+    """See base class."""
+    return False
+  def get_params(self):
+    """See base class."""
+    encode_params = collections.OrderedDict([(self.NORM_PARAMS_KEY,
+                                              self._clip_norm)])
+    decode_params = collections.OrderedDict()
+    return encode_params, decode_params
+  def encode(self, x, encode_params):
+    """See base class."""
+    clipped_x = tf.clip_by_norm(
+        x, tf.cast(encode_params[self.NORM_PARAMS_KEY], x.dtype))
+    return collections.OrderedDict([(self.ENCODED_VALUES_KEY, clipped_x)])
+  def decode(self,
+             encoded_tensors,
+             decode_params,
+             num_summands=None,
+             shape=None):
+    """See base class."""
+    del decode_params, num_summands, shape  # Unused.
+    return tf.identity(encoded_tensors[self.ENCODED_VALUES_KEY])
+@encoding_stage.tf_style_encoding_stage
+class ClipByValueEncodingStage(encoding_stage.EncodingStageInterface):
+  """Encoding stage applying clipping by value (L-infinity ball projection).
+  See `tf.clip_by_value` for more information.
+  """
+  ENCODED_VALUES_KEY = 'clipped_values'
+  MIN_PARAMS_KEY = 'min_param'
+  MAX_PARAMS_KEY = 'max_param'
+  def __init__(self, clip_value_min, clip_value_max):
+    """Initializer for the `ClipByValueEncodingStage`.
+    Args:
+      clip_value_min: A scalar, the minimum value to which to clip.
+      clip_value_max: A scalar, the maximum value to which to clip.
+    """
+    self._clip_value_min = clip_value_min
+    self._clip_value_max = clip_value_max
+  @property
+  def name(self):
+    """See base class."""
+    return 'clip_by_value'
+  @property
+  def compressible_tensors_keys(self):
+    """See base class."""
+    return [self.ENCODED_VALUES_KEY]
+  @property
+  def commutes_with_sum(self):
+    """See base class."""
+    return True
+  @property
+  def decode_needs_input_shape(self):
+    """See base class."""
+    return False
+  def get_params(self):
+    """See base class."""
+    params = collections.OrderedDict([
+        (self.MIN_PARAMS_KEY, self._clip_value_min),
+        (self.MAX_PARAMS_KEY, self._clip_value_max)
+    ])
+    return params, collections.OrderedDict()
+  def encode(self, x, encode_params):
+    """See base class."""
+    clipped_x = tf.clip_by_value(
+        x,
+        tf.cast(encode_params[self.MIN_PARAMS_KEY], x.dtype),
+        tf.cast(encode_params[self.MAX_PARAMS_KEY], x.dtype))
+    return collections.OrderedDict([(self.ENCODED_VALUES_KEY, clipped_x)])
+  def decode(self,
+             encoded_tensors,
+             decode_params,
+             num_summands=None,
+             shape=None):
+    """See base class."""
+    del decode_params, num_summands, shape  # Unused.
+    return tf.identity(encoded_tensors[self.ENCODED_VALUES_KEY])

clipping_test.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# Copyright 2019, The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import itertools
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.stages.research import clipping
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.testing import test_utils
+if tf.executing_eagerly():
+  tf.compat.v1.disable_eager_execution()
+class ClipByNormEncodingStageTest(test_utils.BaseEncodingStageTest):
+  def default_encoding_stage(self):
+    """See base class."""
+    return clipping.ClipByNormEncodingStage(1.0)
+  def default_input(self):
+    """See base class."""
+    return tf.random.normal([20])
+  @property
+  def is_lossless(self):
+    """See base class."""
+    return False
+  def common_asserts_for_test_data(self, data):
+    """See base class."""
+    encoded_x = data.encoded_x[
+        clipping.ClipByNormEncodingStage.ENCODED_VALUES_KEY]
+    # The encoding should not change the shape...
+    self.assertAllEqual(data.x.shape, encoded_x.shape)
+    # The decoding should be identity.
+    self.assertAllEqual(encoded_x, data.decoded_x)
+  def test_clipping_effective(self):
+    stage = clipping.ClipByNormEncodingStage(1.0)
+    test_data = self.run_one_to_many_encode_decode(
+        stage, lambda: tf.constant([1.0, 1.0, 1.0, 1.0]))
+    self.common_asserts_for_test_data(test_data)
+    self.assertAllEqual([1.0, 1.0, 1.0, 1.0], test_data.x)
+    # The decoded values should have norm 1.
+    self.assertAllClose([0.5, 0.5, 0.5, 0.5], test_data.decoded_x)
+  def test_clipping_large_norm_identity(self):
+    stage = clipping.ClipByNormEncodingStage(1000.0)
+    test_data = self.run_one_to_many_encode_decode(
+        stage, lambda: tf.constant([1.0, 1.0, 1.0, 1.0]))
+    self.common_asserts_for_test_data(test_data)
+    # The encoding should act as an identity, if input value has smaller norm.
+    self.assertAllEqual(test_data.x, test_data.decoded_x)
+  @parameterized.parameters(([2,],), ([2, 3],), ([2, 3, 4],))
+  def test_different_shapes(self, shape):
+    stage = clipping.ClipByNormEncodingStage(1.0)
+    test_data = self.run_one_to_many_encode_decode(
+        stage, lambda: tf.random.uniform(shape) + 1.0)
+    self.common_asserts_for_test_data(test_data)
+    self.assertAllClose(1.0, np.linalg.norm(test_data.decoded_x))
+  @parameterized.parameters(
+      itertools.product([tf.float32, tf.float64], [tf.float32, tf.float64]))
+  def test_input_types(self, x_dtype, clip_norm_dtype):
+    # Tests combinations of input dtypes.
+    stage = clipping.ClipByNormEncodingStage(
+        tf.constant(1.0, clip_norm_dtype))
+    x = tf.constant([1.0, 1.0, 1.0, 1.0], dtype=x_dtype)
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = test_utils.TestData(x, encoded_x, decoded_x)
+    test_data = self.evaluate_test_data(test_data)
+    self.assertAllEqual([1.0, 1.0, 1.0, 1.0], test_data.x)
+    # The decoded values should have norm 1.
+    self.assertAllClose([0.5, 0.5, 0.5, 0.5], test_data.decoded_x)
+class ClipByValueEncodingStageTest(test_utils.BaseEncodingStageTest):
+  def default_encoding_stage(self):
+    """See base class."""
+    return clipping.ClipByValueEncodingStage(-1.0, 1.0)
+  def default_input(self):
+    """See base class."""
+    return tf.random.normal([20])
+  @property
+  def is_lossless(self):
+    """See base class."""
+    return False
+  def common_asserts_for_test_data(self, data):
+    """See base class."""
+    encoded_x = data.encoded_x[
+        clipping.ClipByValueEncodingStage.ENCODED_VALUES_KEY]
+    # The encoding should not change the shape...
+    self.assertAllEqual(data.x.shape, encoded_x.shape)
+    # The decoding should be identity.
+    self.assertAllEqual(encoded_x, data.decoded_x)
+  def test_clipping_effective(self):
+    stage = clipping.ClipByValueEncodingStage(-1.0, 1.0)
+    test_data = self.run_one_to_many_encode_decode(
+        stage, lambda: tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0]))
+    self.common_asserts_for_test_data(test_data)
+    self.assertAllEqual([-2.0, -1.0, 0.0, 1.0, 2.0], test_data.x)
+    self.assertAllClose([-1.0, -1.0, 0.0, 1.0, 1.0], test_data.decoded_x)
+  def test_clipping_large_min_max_identity(self):
+    stage = clipping.ClipByValueEncodingStage(-1000.0, 1000.0)
+    test_data = self.run_one_to_many_encode_decode(stage, self.default_input)
+    self.common_asserts_for_test_data(test_data)
+    # The encoding should act as an identity, if input has smaller values.
+    self.assertAllEqual(test_data.x, test_data.decoded_x)
+  @parameterized.parameters(([2,],), ([2, 3],), ([2, 3, 4],))
+  def test_different_shapes(self, shape):
+    stage = clipping.ClipByValueEncodingStage(-1.0, 1.0)
+    test_data = self.run_one_to_many_encode_decode(
+        stage, lambda: tf.random.normal(shape))
+    self.common_asserts_for_test_data(test_data)
+    self.assertGreaterEqual(1.0, np.amax(test_data.decoded_x))
+    self.assertLessEqual(-1.0, np.amin(test_data.decoded_x))
+  @parameterized.parameters(
+      itertools.product([tf.float32, tf.float64], [tf.float32, tf.float64],
+                        [tf.float32, tf.float64]))
+  def test_input_types(self, x_dtype, clip_value_min_dtype,
+                       clip_value_max_dtype):
+    # Tests combinations of input dtypes.
+    stage = clipping.ClipByValueEncodingStage(
+        tf.constant(-1.0, clip_value_min_dtype),
+        tf.constant(1.0, clip_value_max_dtype))
+    x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=x_dtype)
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = test_utils.TestData(x, encoded_x, decoded_x)
+    test_data = self.evaluate_test_data(test_data)
+    self.common_asserts_for_test_data(test_data)
+    self.assertAllEqual([-2.0, -1.0, 0.0, 1.0, 2.0], test_data.x)
+    self.assertAllClose([-1.0, -1.0, 0.0, 1.0, 1.0], test_data.decoded_x)
+if __name__ == '__main__':
+  tf.test.main()

cluster_preserve_integration_test.py ADDED Viewed

	@@ -0,0 +1,709 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Integration tests for CQAT, PCQAT cases."""
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.clustering.keras import cluster
+from tensorflow_model_optimization.python.core.clustering.keras import cluster_config
+from tensorflow_model_optimization.python.core.clustering.keras.experimental import cluster as experimental_cluster
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.quantization.keras import quantize
+from tensorflow_model_optimization.python.core.quantization.keras.collab_opts.cluster_preserve import (
+    default_8bit_cluster_preserve_quantize_scheme,)
+from tensorflow_model_optimization.python.core.quantization.keras.collab_opts.cluster_preserve.cluster_utils import (
+    strip_clustering_cqat,)
+layers = keras.layers
+class ClusterPreserveIntegrationTest(tf.test.TestCase, parameterized.TestCase):
+  def setUp(self):
+    super(ClusterPreserveIntegrationTest, self).setUp()
+    self.cluster_params = {
+        'number_of_clusters': 4,
+        'cluster_centroids_init': cluster_config.CentroidInitialization.LINEAR
+    }
+  def compile_and_fit(self, model):
+    """Here we compile and fit the model."""
+    model.compile(
+        loss=keras.losses.categorical_crossentropy,
+        optimizer='adam',
+        metrics=['accuracy'],
+    )
+    model.fit(
+        np.random.rand(20, 10),
+        keras.utils.to_categorical(np.random.randint(5, size=(20, 1)), 5),
+        batch_size=20,
+    )
+  def _get_number_of_unique_weights(self, stripped_model, layer_nr,
+                                    weight_name):
+    layer = stripped_model.layers[layer_nr]
+    if isinstance(layer, quantize.quantize_wrapper.QuantizeWrapper):
+      for weight_item in layer.trainable_weights:
+        if weight_name in weight_item.name:
+          weight = weight_item
+    else:
+      weight = getattr(layer, weight_name)
+    weights_as_list = weight.numpy().flatten()
+    nr_of_unique_weights = len(set(weights_as_list))
+    return nr_of_unique_weights
+  def _get_sparsity(self, model):
+    sparsity_list = []
+    for layer in model.layers:
+      for weights in layer.trainable_weights:
+        if 'kernel' in weights.name:
+          np_weights = keras.backend.get_value(weights)
+          sparsity = 1.0 - np.count_nonzero(np_weights) / float(
+              np_weights.size)
+          sparsity_list.append(sparsity)
+    return sparsity_list
+  def _get_clustered_model(self, preserve_sparsity):
+    """Cluster the (sparse) model and return clustered_model."""
+    tf.random.set_seed(1)
+    original_model = keras.Sequential([
+        layers.Dense(5, activation='softmax', input_shape=(10,)),
+        layers.Flatten(),
+    ])
+    # Manually set sparsity in the Dense layer if preserve_sparsity is on
+    if preserve_sparsity:
+      first_layer_weights = original_model.layers[0].get_weights()
+      first_layer_weights[0][:][0:2] = 0.0
+      original_model.layers[0].set_weights(first_layer_weights)
+    # Start the sparsity-aware clustering
+    clustering_params = {
+        'number_of_clusters': 4,
+        'cluster_centroids_init': cluster_config.CentroidInitialization.LINEAR,
+        'preserve_sparsity': True
+    }
+    clustered_model = experimental_cluster.cluster_weights(
+        original_model, **clustering_params)
+    return clustered_model
+  def _get_conv_model(self,
+                      nr_of_channels,
+                      data_format=None,
+                      kernel_size=(3, 3)):
+    """Returns functional model with Conv2D layer."""
+    inp = keras.layers.Input(shape=(32, 32), batch_size=100)
+    shape = (1, 32, 32) if data_format == 'channels_first' else (32, 32, 1)
+    x = keras.layers.Reshape(shape)(inp)
+    x = keras.layers.Conv2D(
+        filters=nr_of_channels,
+        kernel_size=kernel_size,
+        data_format=data_format,
+        activation='relu',
+    )(x)
+    x = keras.layers.MaxPool2D(2, 2)(x)
+    out = keras.layers.Flatten()(x)
+    model = keras.Model(inputs=inp, outputs=out)
+    return model
+  def _compile_and_fit_conv_model(self, model, nr_epochs=1):
+    """Compile and fit conv model from _get_conv_model."""
+    x_train = np.random.uniform(size=(500, 32, 32))
+    y_train = np.random.randint(low=0, high=1024, size=(500,))
+    model.compile(
+        optimizer=keras.optimizers.Adam(learning_rate=1e-4),
+        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+        metrics=[keras.metrics.SparseCategoricalAccuracy(name='accuracy')],
+    )
+    model.fit(x_train, y_train, epochs=nr_epochs, batch_size=100, verbose=1)
+    return model
+  def _get_conv_clustered_model(self,
+                                nr_of_channels,
+                                nr_of_clusters,
+                                data_format,
+                                preserve_sparsity,
+                                kernel_size=(3, 3)):
+    """Returns clustered per channel model with Conv2D layer."""
+    tf.random.set_seed(42)
+    model = self._get_conv_model(nr_of_channels, data_format, kernel_size)
+    if preserve_sparsity:
+      # Make the convolutional layer sparse by nullifying half of weights
+      assert model.layers[2].name == 'conv2d'
+      conv_layer_weights = model.layers[2].get_weights()
+      shape = conv_layer_weights[0].shape
+      conv_layer_weights_flatten = conv_layer_weights[0].flatten()
+      nr_elems = len(conv_layer_weights_flatten)
+      conv_layer_weights_flatten[0:1 + nr_elems // 2] = 0.0
+      pruned_conv_layer_weights = tf.reshape(conv_layer_weights_flatten, shape)
+      conv_layer_weights[0] = pruned_conv_layer_weights
+      model.layers[2].set_weights(conv_layer_weights)
+    clustering_params = {
+        'number_of_clusters':
+            nr_of_clusters,
+        'cluster_centroids_init':
+            cluster_config.CentroidInitialization.KMEANS_PLUS_PLUS,
+        'cluster_per_channel':
+            True,
+        'preserve_sparsity':
+            preserve_sparsity
+    }
+    clustered_model = experimental_cluster.cluster_weights(model,
+                                                           **clustering_params)
+    clustered_model = self._compile_and_fit_conv_model(clustered_model)
+    # Returns un-stripped model
+    return clustered_model
+  def _pcqat_training(self, preserve_sparsity, quant_aware_annotate_model):
+    """PCQAT training on the input model."""
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme(preserve_sparsity))
+    self.compile_and_fit(quant_aware_model)
+    stripped_pcqat_model = strip_clustering_cqat(quant_aware_model)
+    # Check the unique weights of clustered_model and pcqat_model
+    # layer 0 is the quantize_layer
+    num_of_unique_weights_pcqat = self._get_number_of_unique_weights(
+        stripped_pcqat_model, 1, 'kernel')
+    sparsity_pcqat = self._get_sparsity(stripped_pcqat_model)
+    return sparsity_pcqat, num_of_unique_weights_pcqat
+  def testEndToEndClusterPreserve(self):
+    """Runs CQAT end to end and whole model is quantized."""
+    original_model = keras.Sequential(
+        [layers.Dense(5, activation='softmax', input_shape=(10,))]
+    )
+    clustered_model = cluster.cluster_weights(
+        original_model,
+        **self.cluster_params)
+    self.compile_and_fit(clustered_model)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    num_of_unique_weights_clustering = self._get_number_of_unique_weights(
+        clustered_model, 0, 'kernel')
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(clustered_model))
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme())
+    self.compile_and_fit(quant_aware_model)
+    stripped_cqat_model = strip_clustering_cqat(quant_aware_model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and pcqat_model
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, 1, 'kernel')
+    self.assertAllEqual(num_of_unique_weights_clustering,
+                        num_of_unique_weights_cqat)
+  def testEndToEndClusterPreservePerLayer(self):
+    """Runs CQAT end to end and model is quantized per layers."""
+    original_model = keras.Sequential([
+        layers.Dense(5, activation='relu', input_shape=(10,)),
+        layers.Dense(5, activation='softmax', input_shape=(10,)),
+    ])
+    clustered_model = cluster.cluster_weights(
+        original_model,
+        **self.cluster_params)
+    self.compile_and_fit(clustered_model)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    num_of_unique_weights_clustering = self._get_number_of_unique_weights(
+        clustered_model, 1, 'kernel')
+    def apply_quantization_to_dense(layer):
+      if isinstance(layer, keras.layers.Dense):
+        return quantize.quantize_annotate_layer(layer)
+      return layer
+    quant_aware_annotate_model = keras.models.clone_model(
+        clustered_model,
+        clone_function=apply_quantization_to_dense,
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme())
+    self.compile_and_fit(quant_aware_model)
+    stripped_cqat_model = strip_clustering_cqat(
+        quant_aware_model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and pcqat_model
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, 2, 'kernel')
+    self.assertAllEqual(num_of_unique_weights_clustering,
+                        num_of_unique_weights_cqat)
+  def testEndToEndClusterPreserveOneLayer(self):
+    """Runs CQAT end to end and model is quantized only for a single layer."""
+    original_model = keras.Sequential([
+        layers.Dense(5, activation='relu', input_shape=(10,)),
+        layers.Dense(5, activation='softmax', input_shape=(10,), name='qat'),
+    ])
+    clustered_model = cluster.cluster_weights(
+        original_model,
+        **self.cluster_params)
+    self.compile_and_fit(clustered_model)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    num_of_unique_weights_clustering = self._get_number_of_unique_weights(
+        clustered_model, 1, 'kernel')
+    def apply_quantization_to_dense(layer):
+      if isinstance(layer, keras.layers.Dense):
+        if layer.name == 'qat':
+          return quantize.quantize_annotate_layer(layer)
+      return layer
+    quant_aware_annotate_model = keras.models.clone_model(
+        clustered_model,
+        clone_function=apply_quantization_to_dense,
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme())
+    self.compile_and_fit(quant_aware_model)
+    stripped_cqat_model = strip_clustering_cqat(
+        quant_aware_model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and pcqat_model
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, 1, 'kernel')
+    self.assertAllEqual(num_of_unique_weights_clustering,
+                        num_of_unique_weights_cqat)
+  def testEndToEndPruneClusterPreserveQAT(self):
+    """Runs PCQAT end to end when we quantize the whole model."""
+    preserve_sparsity = True
+    clustered_model = self._get_clustered_model(preserve_sparsity)
+    # Save the kernel weights
+    first_layer_weights = clustered_model.layers[0].weights[1]
+    stripped_model_before_tuning = cluster.strip_clustering(
+        clustered_model)
+    nr_of_unique_weights_before = self._get_number_of_unique_weights(
+        stripped_model_before_tuning, 0, 'kernel')
+    self.compile_and_fit(clustered_model)
+    stripped_model_clustered = cluster.strip_clustering(clustered_model)
+    weights_after_tuning = stripped_model_clustered.layers[0].kernel
+    nr_of_unique_weights_after = self._get_number_of_unique_weights(
+        stripped_model_clustered, 0, 'kernel')
+    # Check after sparsity-aware clustering, despite zero centroid can drift,
+    # the final number of unique weights remains the same
+    self.assertEqual(nr_of_unique_weights_before, nr_of_unique_weights_after)
+    # Check that the zero weights stayed the same before and after tuning.
+    # There might be new weights that become zeros but sparsity-aware
+    # clustering preserves the original zero weights in the original positions
+    # of the weight array
+    self.assertTrue(
+        np.array_equal(first_layer_weights[:][0:2],
+                       weights_after_tuning[:][0:2]))
+    # Check sparsity before the input of PCQAT
+    sparsity_pruning = self._get_sparsity(stripped_model_clustered)
+    # PCQAT: when the preserve_sparsity flag is True, the PCQAT should work
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(stripped_model_clustered)
+    )
+    # When preserve_sparsity is True in PCQAT, the final sparsity of
+    # the layer stays the same or larger than that of the input layer
+    preserve_sparsity = True
+    sparsity_pcqat, unique_weights_pcqat = self._pcqat_training(
+        preserve_sparsity, quant_aware_annotate_model)
+    self.assertAllGreaterEqual(np.array(sparsity_pcqat),
+                               sparsity_pruning[0])
+    self.assertAllEqual(nr_of_unique_weights_after, unique_weights_pcqat)
+  def testEndToEndClusterPreserveQATClusteredPerChannel(
+      self, data_format='channels_last'):
+    """Runs CQAT end to end for the model that is clustered per channel."""
+    nr_of_channels = 12
+    nr_of_clusters = 4
+    clustered_model = self._get_conv_clustered_model(
+        nr_of_channels, nr_of_clusters, data_format, preserve_sparsity=False)
+    stripped_model = cluster.strip_clustering(clustered_model)
+    # Save the kernel weights
+    conv2d_layer = stripped_model.layers[2]
+    self.assertEqual(conv2d_layer.name, 'conv2d')
+    # should be nr_of_channels * nr_of_clusters
+    nr_unique_weights = -1
+    for weight in conv2d_layer.weights:
+      if 'kernel' in weight.name:
+        nr_unique_weights = len(np.unique(weight.numpy()))
+        self.assertLessEqual(nr_unique_weights, nr_of_clusters*nr_of_channels)
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(stripped_model)
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme())
+    # Lets train for more epochs to have a chance to scatter clusters
+    model = self._compile_and_fit_conv_model(quant_aware_model, 3)
+    stripped_cqat_model = strip_clustering_cqat(model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and pcqat_model
+    layer_nr = 3
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, layer_nr, 'kernel')
+    self.assertLessEqual(num_of_unique_weights_cqat, nr_unique_weights)
+    # We need to do tighter check: we check that the number of unique
+    # weights per channel is less than the given nr_of_channels
+    layer = stripped_cqat_model.layers[layer_nr]
+    weight_to_check = None
+    if isinstance(layer, quantize.quantize_wrapper.QuantizeWrapper):
+      for weight_item in layer.trainable_weights:
+        if 'kernel' in weight_item.name:
+          weight_to_check = weight_item
+    assert weight_to_check is not None
+    for i in range(nr_of_channels):
+      nr_unique_weights_per_channel = len(
+          np.unique(weight_to_check[:, :, :, i]))
+      assert nr_unique_weights_per_channel == nr_of_clusters
+  def testEndToEndPCQATClusteredPerChannel(self, data_format='channels_last'):
+    """Runs PCQAT end to end for the model that is clustered per channel."""
+    nr_of_channels = 12
+    nr_of_clusters = 4
+    clustered_model = self._get_conv_clustered_model(
+        nr_of_channels, nr_of_clusters, data_format, preserve_sparsity=True)
+    stripped_model = cluster.strip_clustering(clustered_model)
+    # Save the kernel weights
+    conv2d_layer = stripped_model.layers[2]
+    self.assertEqual(conv2d_layer.name, 'conv2d')
+    # should be nr_of_channels * nr_of_clusters
+    nr_unique_weights = -1
+    for weight in conv2d_layer.weights:
+      if 'kernel' in weight.name:
+        nr_unique_weights = len(np.unique(weight.numpy()))
+        self.assertLessEqual(nr_unique_weights, nr_of_clusters*nr_of_channels)
+    # get sparsity before PCQAT training
+    # we expect that only one value will be returned
+    control_sparsity = self._get_sparsity(stripped_model)
+    self.assertGreater(control_sparsity[0], 0.5)
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(stripped_model)
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme())
+    # Lets train for more epochs to have a chance to scatter clusters
+    model = self._compile_and_fit_conv_model(quant_aware_model, 3)
+    stripped_cqat_model = strip_clustering_cqat(model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and cqat_model
+    layer_nr = 3
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, layer_nr, 'kernel')
+    self.assertLessEqual(num_of_unique_weights_cqat, nr_unique_weights)
+    # We need to do tighter check: we check that the number of unique
+    # weights per channel is less than the given nr_of_channels
+    layer = stripped_cqat_model.layers[layer_nr]
+    weight_to_check = None
+    if isinstance(layer, quantize.quantize_wrapper.QuantizeWrapper):
+      for weight_item in layer.trainable_weights:
+        if 'kernel' in weight_item.name:
+          weight_to_check = weight_item
+    assert weight_to_check is not None
+    for i in range(nr_of_channels):
+      nr_unique_weights_per_channel = len(
+          np.unique(weight_to_check[:, :, :, i]))
+      assert nr_unique_weights_per_channel == nr_of_clusters
+    cqat_sparsity = self._get_sparsity(stripped_cqat_model)
+    self.assertLessEqual(cqat_sparsity[0], control_sparsity[0])
+  def testEndToEndPCQATClusteredPerChannelConv2d1x1(self,
+                                                    data_format='channels_last'
+                                                    ):
+    """Runs PCQAT for model containing a 1x1 Conv2D.
+    (with insufficient number of weights per channel).
+    Args:
+      data_format: Format of input data.
+    """
+    nr_of_channels = 12
+    nr_of_clusters = 4
+    # Ensure a warning is given to the user that
+    # clustering is not implemented for this layer
+    with self.assertWarnsRegex(Warning,
+                               r'Layer conv2d does not have enough weights'):
+      clustered_model = self._get_conv_clustered_model(
+          nr_of_channels,
+          nr_of_clusters,
+          data_format,
+          preserve_sparsity=True,
+          kernel_size=(1, 1))
+      stripped_model = cluster.strip_clustering(clustered_model)
+    # Save the kernel weights
+    conv2d_layer = stripped_model.layers[2]
+    self.assertEqual(conv2d_layer.name, 'conv2d')
+    for weight in conv2d_layer.weights:
+      if 'kernel' in weight.name:
+        # Original number of unique weights
+        nr_original_weights = len(np.unique(weight.numpy()))
+        self.assertLess(nr_original_weights, nr_of_channels * nr_of_clusters)
+        # Demonstrate unmodified test layer has less weights
+        # than requested clusters
+        for channel in range(nr_of_channels):
+          channel_weights = (
+              weight[:, channel, :, :]
+              if data_format == 'channels_first' else weight[:, :, :, channel])
+          nr_channel_weights = len(channel_weights)
+          self.assertGreater(nr_channel_weights, 0)
+          self.assertLessEqual(nr_channel_weights, nr_of_clusters)
+    # get sparsity before PCQAT training
+    # we expect that only one value will be returned
+    control_sparsity = self._get_sparsity(stripped_model)
+    self.assertGreater(control_sparsity[0], 0.5)
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(stripped_model))
+    with self.assertWarnsRegex(
+        Warning, r'No clustering performed on layer quant_conv2d'):
+      quant_aware_model = quantize.quantize_apply(
+          quant_aware_annotate_model,
+          scheme=default_8bit_cluster_preserve_quantize_scheme
+          .Default8BitClusterPreserveQuantizeScheme(preserve_sparsity=True))
+    # Lets train for more epochs to have a chance to scatter clusters
+    model = self._compile_and_fit_conv_model(quant_aware_model, 3)
+    stripped_cqat_model = strip_clustering_cqat(model)
+    # Check the unique weights of a certain layer of
+    # clustered_model and cqat_model, ensuring unchanged
+    layer_nr = 3
+    num_of_unique_weights_cqat = self._get_number_of_unique_weights(
+        stripped_cqat_model, layer_nr, 'kernel')
+    self.assertEqual(num_of_unique_weights_cqat, nr_original_weights)
+    cqat_sparsity = self._get_sparsity(stripped_cqat_model)
+    self.assertLessEqual(cqat_sparsity[0], control_sparsity[0])
+  def testPassingNonPrunedModelToPCQAT(self):
+    """Runs PCQAT as CQAT if the input model is not pruned."""
+    preserve_sparsity = False
+    clustered_model = self._get_clustered_model(preserve_sparsity)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    nr_of_unique_weights_after = self._get_number_of_unique_weights(
+        clustered_model, 0, 'kernel')
+    # Check after plain clustering, if there are no zero weights,
+    # PCQAT falls back to CQAT
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(clustered_model)
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme(True))
+    self.compile_and_fit(quant_aware_model)
+    stripped_pcqat_model = strip_clustering_cqat(
+        quant_aware_model)
+    # Check the unique weights of clustered_model and pcqat_model
+    num_of_unique_weights_pcqat = self._get_number_of_unique_weights(
+        stripped_pcqat_model, 1, 'kernel')
+    self.assertAllEqual(nr_of_unique_weights_after,
+                        num_of_unique_weights_pcqat)
+  @parameterized.parameters((0.), (2.))
+  def testPassingModelWithUniformWeightsToPCQAT(self, uniform_weights):
+    """If pruned_clustered_model has uniform weights, it won't break PCQAT."""
+    preserve_sparsity = True
+    original_model = keras.Sequential([
+        layers.Dense(5, activation='softmax', input_shape=(10,)),
+        layers.Flatten(),
+    ])
+    # Manually set all weights to the same value in the Dense layer
+    first_layer_weights = original_model.layers[0].get_weights()
+    first_layer_weights[0][:] = uniform_weights
+    original_model.layers[0].set_weights(first_layer_weights)
+    # Start the sparsity-aware clustering
+    clustering_params = {
+        'number_of_clusters': 4,
+        'cluster_centroids_init': cluster_config.CentroidInitialization.LINEAR,
+        'preserve_sparsity': True
+    }
+    clustered_model = experimental_cluster.cluster_weights(
+        original_model, **clustering_params)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    nr_of_unique_weights_after = self._get_number_of_unique_weights(
+        clustered_model, 0, 'kernel')
+    sparsity_pruning = self._get_sparsity(clustered_model)
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(clustered_model)
+    )
+    sparsity_pcqat, unique_weights_pcqat = self._pcqat_training(
+        preserve_sparsity, quant_aware_annotate_model)
+    self.assertAllGreaterEqual(np.array(sparsity_pcqat),
+                               sparsity_pruning[0])
+    self.assertAllEqual(nr_of_unique_weights_after, unique_weights_pcqat)
+  def testTrainableWeightsBehaveCorrectlyDuringPCQAT(self):
+    """PCQAT zero centroid masks stay the same and trainable variables are updating between epochs."""
+    preserve_sparsity = True
+    clustered_model = self._get_clustered_model(preserve_sparsity)
+    clustered_model = cluster.strip_clustering(clustered_model)
+    # Apply PCQAT
+    quant_aware_annotate_model = (
+        quantize.quantize_annotate_model(clustered_model)
+    )
+    quant_aware_model = quantize.quantize_apply(
+        quant_aware_annotate_model,
+        scheme=default_8bit_cluster_preserve_quantize_scheme
+        .Default8BitClusterPreserveQuantizeScheme(True))
+    quant_aware_model.compile(
+        loss=keras.losses.categorical_crossentropy,
+        optimizer='adam',
+        metrics=['accuracy'],
+    )
+    class CheckCentroidsAndTrainableVarsCallback(keras.callbacks.Callback):
+      """Check the updates of trainable variables and centroid masks."""
+      def on_epoch_begin(self, batch, logs=None):
+        # Check cluster centroids have the zero in the right position
+        vars_dictionary = self.model.layers[1]._weight_vars[0][2]
+        self.centroid_mask = vars_dictionary['centroids_mask']
+        self.zero_centroid_index_begin = np.where(
+            self.centroid_mask == 0)[0]
+        # Check trainable weights before training
+        self.layer_kernel = (
+            self.model.layers[1].weights[3].numpy()
+        )
+        self.original_weight = vars_dictionary['ori_weights_vars_tf'].numpy()
+        self.centroids = vars_dictionary['cluster_centroids_tf'].numpy()
+      def on_epoch_end(self, batch, logs=None):
+        # Check the index of the zero centroids are not changed after training
+        vars_dictionary = self.model.layers[1]._weight_vars[0][2]
+        self.zero_centroid_index_end = np.where(
+            vars_dictionary['centroids_mask'] == 0)[0]
+        assert np.array_equal(
+            self.zero_centroid_index_begin,
+            self.zero_centroid_index_end
+        )
+        # Check trainable variables after training are updated
+        assert not np.array_equal(
+            self.layer_kernel,
+            self.model.layers[1].weights[3].numpy()
+        )
+        assert not np.array_equal(
+            self.original_weight,
+            vars_dictionary['ori_weights_vars_tf'].numpy()
+        )
+        assert not np.array_equal(
+            self.centroids,
+            vars_dictionary['cluster_centroids_tf'].numpy()
+        )
+    # Use many epochs to verify layer's kernel weights are updating because
+    # they can stay the same after being trained using only the first batch
+    # of data for instance
+    quant_aware_model.fit(
+        np.random.rand(20, 10),
+        keras.utils.to_categorical(np.random.randint(5, size=(20, 1)), 5),
+        steps_per_epoch=5,
+        epochs=3,
+        callbacks=[CheckCentroidsAndTrainableVarsCallback()],
+    )
+if __name__ == '__main__':
+  tf.test.main()

cluster_preserve_quantize_registry.py ADDED Viewed

	@@ -0,0 +1,539 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Registry responsible for built-in keras classes."""
+import logging
+import warnings
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.clustering.keras import cluster_config
+from tensorflow_model_optimization.python.core.clustering.keras import clustering_registry
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.quantization.keras import quant_ops
+from tensorflow_model_optimization.python.core.quantization.keras import quantizers
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import default_8bit_quantize_registry
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import default_8bit_quantizers
+layers = keras.layers
+K = keras.backend
+CLUSTER_CENTROIDS = 'cluster_centroids_tf'
+PULLING_INDICES = 'pulling_indices_tf'
+ORIGINAL_WEIGHTS = 'ori_weights_vars_tf'
+WEIGHT_NAME = 'weight_name'
+CLUSTERING_IMPL = 'clst_impl'
+CENTROIDS_MASK = 'centroids_mask'
+SPARSITY_MASK = 'sparsity_mask'
+def get_unique(t):
+  """Get unique values and lookup index from N-D tensor.
+  Args:
+    t: tensor
+  Returns:
+    centroids (unique values), lookup index (same shape as input tensor)
+  Example:
+    t:
+    ([[1.0, 2.0],
+      [2.0, 3.0],
+      [3.0, 3.0],
+      [1.0, 2.0]]
+    )
+    centroids(unique values):
+    ([1.0, 2.0, 3.0])
+    output final index:
+    ([[0, 1],
+      [1, 2],
+      [2, 2],
+      [0, 1]]
+    )
+  """
+  t_flatten = tf.reshape(t, shape=(-1,))
+  uniques, index = tf.unique(t_flatten)
+  return uniques, tf.reshape(index, shape=tf.shape(t))
+def get_centroids(layer, weight, data_format):
+  """Gets centroid infos from the weights of a layer.
+  Args:
+    layer: The Keras layer from which the weight belong.
+    weight: The weight tensor to get the centroids info from.
+    data_format: string to indicate format: "channels_first" or "channels_last".
+  Returns:
+    A 4-tuple of centroids (unique values), number of centroids, lookup index,
+    whether to cluster per channel (boolean).
+  """
+  cluster_per_channel = layer.layer and isinstance(
+      layer.layer, keras.layers.Conv2D
+  )
+  if not cluster_per_channel:
+    centroids, index = get_unique(weight)
+    return centroids, tf.size(centroids), index, False
+  # In case of cluster_per_channel we need to extract
+  # unique values (centroids) for each channel.
+  num_channels = weight.shape[1 if data_format == 'channels_first' else -1]
+  channel_centroids = []
+  channel_indices = []
+  num_centroids = []
+  for channel in range(num_channels):
+    channel_weights = weight[:, :, :, channel]
+    centroids, indices = get_unique(channel_weights)
+    channel_centroids.append(centroids)
+    channel_indices.append(indices)
+    num_centroids.append(tf.size(centroids))
+  max_centroid = max(num_centroids)
+  max_diff = max_centroid - min(num_centroids)
+  if max_diff > 1:
+    centroids, index = get_unique(weight)
+    return centroids, tf.size(centroids), index, False
+  for i, centroid in enumerate(channel_centroids):
+    if num_centroids[i] != max_centroid:
+      one_padding = tf.ones([max_centroid - num_centroids[i]])
+      channel_centroids[i] = tf.concat([centroid, one_padding], 0)
+  centroids = tf.convert_to_tensor(channel_centroids)
+  lookup = tf.convert_to_tensor(channel_indices)
+  lookup = tf.transpose(
+      lookup,
+      perm=(1, 0, 2, 3) if data_format == 'channels_first' else (1, 2, 3, 0))
+  return centroids, max_centroid, lookup, True
+class _ClusterPreserveInfo(object):
+  """ClusterPreserveInfo."""
+  def __init__(self, weight_attrs, quantize_config_attrs):
+    """ClusterPreserveInfo.
+    Args:
+      weight_attrs: list of cluster preservable weight attributes of layer.
+      quantize_config_attrs: list of quantization configuration class name.
+    """
+    self.weight_attrs = weight_attrs
+    self.quantize_config_attrs = quantize_config_attrs
+class ClusterPreserveQuantizeRegistry(object):
+  """ClusterPreserveQuantizeRegistry is for built-in keras layers."""
+  # The keys represent built-in keras layers; the first values represent the
+  # the variables within the layers which hold the kernel weights, second
+  # values represent the class name of quantization configuration for layers.
+  # This decide the weights of layers with quantization configurations are
+  # cluster preservable.
+  _LAYERS_CONFIG_MAP = {
+      layers.Conv2D:
+      _ClusterPreserveInfo(['kernel'], ['Default8BitConvQuantizeConfig']),
+      layers.Dense:
+      _ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # DepthwiseConv2D is supported with 8bit qat, but not with
+      # clustering, thus for DepthwiseConv2D CQAT,
+      # preserving clustered weights is disabled.
+      layers.DepthwiseConv2D:
+      _ClusterPreserveInfo(['depthwise_kernel'],
+                           ['Default8BitQuantizeConfig']),
+      # layers that are supported with clustering, but not yet with qat
+      # layers.Conv1D:
+      # _ClusterPreserveInfo(['kernel'], []),
+      # layers.Conv2DTranspose:
+      # _ClusterPreserveInfo(['kernel'], []),
+      # layers.Conv3D:
+      # _ClusterPreserveInfo(['kernel'], []),
+      # layers.Conv3DTranspose:
+      # _ClusterPreserveInfo(['kernel'], []),
+      # layers.LocallyConnected1D:
+      # _ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # layers.LocallyConnected2D:
+      # _ClusterPreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # SeparableConv need verify from 8bit qat
+      # layers.SeparableConv1D:
+      # _ClusterPreserveInfo(['pointwise_kernel'],
+      #                      ['Default8BitConvQuantizeConfig']),
+      # layers.SeparableConv2D:
+      # _ClusterPreserveInfo(['pointwise_kernel'],
+      #                      ['Default8BitConvQuantizeConfig']),
+      # Embedding need verify from 8bit qat
+      # layers.Embedding: _ClusterPreserveInfo(['embeddings'], []),
+  }
+  _DISABLE_CLUSTER_PRESERVE = frozenset({
+      layers.DepthwiseConv2D,
+  })
+  def __init__(self, preserve_sparsity):
+    self._config_quantizer_map = {
+        'Default8BitQuantizeConfig':
+        ClusterPreserveDefault8BitWeightsQuantizer(preserve_sparsity),
+        'Default8BitConvQuantizeConfig':
+        ClusterPreserveDefault8BitConvWeightsQuantizer(preserve_sparsity),
+    }
+  @classmethod
+  def _no_trainable_weights(cls, layer):
+    """Returns whether this layer has trainable weights.
+    Args:
+      layer: The layer to check for trainable weights.
+    Returns:
+      True/False whether the layer has trainable weights.
+    """
+    return not layer.trainable_weights
+  @classmethod
+  def _disable_cluster_preserve(cls, layer):
+    """Returns whether to disable this layer for preserving clusters.
+    Args:
+      layer: The layer to check for disabling.
+    Returns:
+      True/False whether disabling this layer for preserving clusters.
+    """
+    return layer.__class__ in cls._DISABLE_CLUSTER_PRESERVE
+  @classmethod
+  def supports(cls, layer):
+    """Returns whether the registry supports this layer type.
+    Args:
+      layer: The layer to check for support.
+    Returns:
+      True/False whether the layer type is supported.
+    """
+    # layers without trainable weights are consider supported,
+    # e.g., ReLU, Softmax, and AveragePooling2D.
+    if cls._no_trainable_weights(layer):
+      return True
+    if layer.__class__ in cls._LAYERS_CONFIG_MAP:
+      return True
+    return False
+  @classmethod
+  def _weight_names(cls, layer):
+    if cls._no_trainable_weights(layer):
+      return []
+    return cls._LAYERS_CONFIG_MAP[layer.__class__].weight_attrs
+  def apply_cluster_preserve_quantize_config(self, layer, quantize_config):
+    """Applies cluster-preserve weight quantizer.
+    Args:
+      layer: The layer to check for support.
+      quantize_config: quantization config for supporting cluster preservation
+      on clustered weights
+    Returns:
+      The quantize_config with addon cluster preserve weight_quantizer.
+    """
+    if not self.supports(layer):
+      raise ValueError('Layer ' + str(layer.__class__) + ' is not supported.')
+    # Example: ReLU, Softmax, and AveragePooling2D (without trainable weights)
+    # DepthwiseConv2D (cluster_preserve is disabled)
+    if self._no_trainable_weights(layer) or self._disable_cluster_preserve(
+        layer):
+      return quantize_config
+    # Example: Conv2D, Dense layers
+    if quantize_config.__class__.__name__ in self._LAYERS_CONFIG_MAP[
+        layer.__class__].quantize_config_attrs:
+      quantize_config.weight_quantizer = self._config_quantizer_map[
+          quantize_config.__class__.__name__]
+    else:
+      raise ValueError('Configuration ' +
+                       str(quantize_config.__class__.__name__) +
+                       ' is not supported for Layer ' + str(layer.__class__) +
+                       '.')
+    return quantize_config
+class Default8bitClusterPreserveQuantizeRegistry(
+    ClusterPreserveQuantizeRegistry):
+  """Default 8 bit ClusterPreserveQuantizeRegistry."""
+  def get_quantize_config(self, layer):
+    """Returns the quantization config with weight_quantizer for a given layer.
+    Args:
+      layer: input layer to return quantize config for.
+    Returns:
+      Returns the quantization config for cluster preserve weight_quantizer.
+    """
+    quantize_config = (default_8bit_quantize_registry.
+                       Default8BitQuantizeRegistry().
+                       get_quantize_config(layer))
+    cluster_aware_quantize_config = super(
+        Default8bitClusterPreserveQuantizeRegistry,
+        self).apply_cluster_preserve_quantize_config(layer, quantize_config)
+    return cluster_aware_quantize_config
+class ClusterPreserveDefaultWeightsQuantizer(quantizers.LastValueQuantizer):
+  """Quantize weights while preserving clusters."""
+  def __init__(
+      self, num_bits, per_axis, symmetric, narrow_range, preserve_sparsity):
+    """ClusterPreserveDefaultWeightsQuantizer.
+    Args:
+      num_bits: Number of bits for quantization
+      per_axis: Whether to apply per_axis quantization. The last dimension is
+        used as the axis.
+      symmetric: If true, use symmetric quantization limits instead of training
+        the minimum and maximum of each quantization range separately.
+      narrow_range: In case of 8 bits, narrow_range nudges the quantized range
+        to be [-127, 127] instead of [-128, 127]. This ensures symmetric
+        range has 0 as the centre.
+      preserve_sparsity: Whether to apply prune-cluster-preserving quantization
+        aware training.
+    """
+    super(ClusterPreserveDefaultWeightsQuantizer, self).__init__(
+        num_bits=num_bits,
+        per_axis=per_axis,
+        symmetric=symmetric,
+        narrow_range=narrow_range,
+    )
+    self.preserve_sparsity = preserve_sparsity
+  def _build_clusters(self, name, layer):
+    """Extracts the cluster centroids and cluster indices.
+    Extracts cluster centroids and cluster indices from the pretrained
+    clustered model when the input layer is clustered.
+    Args:
+      name: Name of weights in layer.
+      layer: Quantization wrapped keras layer.
+    Returns:
+      A dictionary of the initial values of the
+      cluster centroids, cluster indices, original weights,
+      the pretrained flag for marking the first training
+      epoch, and weight name.
+    """
+    result = {}
+    weights = getattr(layer.layer, name)
+    if self.preserve_sparsity and not tf.reduce_any(weights == 0):
+      self.preserve_sparsity = False
+      logging.warning(
+          'Input layer does not contain zero weights, so apply CQAT instead.')
+    centroids_mask = None
+    # Detects whether layer is convolutional and is clustered per channel
+    data_format = getattr(layer.layer, 'data_format', None)
+    centroids, num_centroids, lookup, cluster_per_channel = get_centroids(
+        layer, weights, data_format)
+    if self.preserve_sparsity:
+      sparsity_mask = tf.math.divide_no_nan(weights, weights)
+      zero_idx = tf.argmin(tf.abs(centroids), axis=-1)
+      centroids_mask = 1.0 - tf.one_hot(zero_idx, num_centroids)
+      result = {SPARSITY_MASK: sparsity_mask}
+    # Prepare clustering variables for the Keras graph when clusters
+    # exist, assuming we do not use number_of_clusters larger than 1024
+    if num_centroids > 1024:
+      warnings.warn(f'No clustering performed on layer {layer.name}.\n'
+                    f'Too many centroids to cluster.')
+      return result
+    # If not enough clusters, we do not preserve clustering
+    elif num_centroids <= 1:
+      warnings.warn(f'No clustering performed on layer {layer.name}.\n'
+                    f'Perhaps too many clusters requested for this layer?')
+      return result
+    else:
+      clst_centroids_tf = layer.add_weight(
+          CLUSTER_CENTROIDS,
+          shape=centroids.shape,
+          initializer=keras.initializers.Constant(
+              value=K.batch_get_value([centroids])[0]
+          ),
+          dtype=centroids.dtype,
+          trainable=True,
+      )
+      ori_weights_tf = layer.add_weight(
+          ORIGINAL_WEIGHTS,
+          shape=weights.shape,
+          initializer=keras.initializers.Constant(
+              value=K.batch_get_value([weights])[0]
+          ),
+          dtype=weights.dtype,
+          trainable=True,
+      )
+      # Get clustering implementation according to layer type
+      clustering_impl_cls = clustering_registry.ClusteringLookupRegistry(
+      ).get_clustering_impl(
+          layer.layer, name, cluster_per_channel=cluster_per_channel)
+      clustering_impl = clustering_impl_cls(
+          clst_centroids_tf, cluster_config.GradientAggregation.SUM,
+          data_format)
+      pulling_indices = tf.dtypes.cast(
+          clustering_impl.get_pulling_indices(ori_weights_tf),
+          lookup.dtype
+      )
+      pulling_indices_tf = layer.add_weight(
+          PULLING_INDICES,
+          shape=lookup.shape,
+          initializer=keras.initializers.Constant(
+              value=K.batch_get_value([pulling_indices])[0]
+          ),
+          dtype=lookup.dtype,
+          trainable=False,
+      )
+      result_clst = {
+          CLUSTER_CENTROIDS: clst_centroids_tf,
+          PULLING_INDICES: pulling_indices_tf,
+          ORIGINAL_WEIGHTS: ori_weights_tf,
+          WEIGHT_NAME: name,
+          CLUSTERING_IMPL: clustering_impl,
+          CENTROIDS_MASK: centroids_mask,
+      }
+      result.update(result_clst)
+      return result
+  def build(self, tensor_shape, name, layer):
+    """Build (P)CQAT wrapper.
+    When preserve_sparsity is true and the input is clustered.
+    Args:
+      tensor_shape: Shape of weights which needs to be quantized.
+      name: Name of weights in layer.
+      layer: Quantization wrapped keras layer.
+    Returns:
+      Dictionary of centroids, indices and
+      quantization params, the dictionary will be passed
+      to __call__ function.
+    """
+    # To get all the initial values from pretrained clustered model
+    result = self._build_clusters(name, layer)
+    # Result can have clustering nodes, then this is CQAT
+    # Result can have both clustering nodes and sparsity mask, then
+    # this will be PCQAT
+    result.update(
+        super(ClusterPreserveDefaultWeightsQuantizer,
+              self).build(tensor_shape, name, layer))
+    return result
+  def __call__(self, inputs, training, weights, **kwargs):
+    """Apply cluster preserved quantization to the input tensor.
+    Args:
+      inputs: Input tensor (layer's weights) to be quantized.
+      training: Whether the graph is currently training.
+      weights: Dictionary of weights (params) the quantizer can use to
+        quantize the tensor (layer's weights). This contains the weights
+        created in the `build` function.
+      **kwargs: Additional variables which may be passed to the quantizer.
+    Returns:
+      quantized tensor.
+    """
+    if training:
+      if CLUSTER_CENTROIDS in weights:
+        if self.preserve_sparsity:
+          weights[ORIGINAL_WEIGHTS].assign(
+              tf.multiply(weights[ORIGINAL_WEIGHTS],
+                          weights[SPARSITY_MASK]))
+          weights[CLUSTERING_IMPL].cluster_centroids.assign(
+              weights[CLUSTERING_IMPL].
+              cluster_centroids * weights[CENTROIDS_MASK]
+          )
+          weights[CLUSTER_CENTROIDS].assign(
+              weights[CLUSTERING_IMPL].cluster_centroids
+          )
+        # Insert clustering variables
+        weights[PULLING_INDICES].assign(tf.dtypes.cast(
+            weights[CLUSTERING_IMPL].get_pulling_indices(
+                weights[ORIGINAL_WEIGHTS]),
+            weights[PULLING_INDICES].dtype
+        ))
+        output = weights[CLUSTERING_IMPL].get_clustered_weight(
+            weights[PULLING_INDICES], weights[ORIGINAL_WEIGHTS])
+        inputs.assign(output)
+      else:
+        if self.preserve_sparsity:
+          inputs = tf.multiply(inputs, weights[SPARSITY_MASK])
+        output = inputs
+    else:
+      output = inputs
+    return quant_ops.LastValueQuantize(
+        output,
+        weights['min_var'],
+        weights['max_var'],
+        is_training=training,
+        num_bits=self.num_bits,
+        per_channel=self.per_axis,
+        symmetric=self.symmetric,
+        narrow_range=self.narrow_range
+    )
+class ClusterPreserveDefault8BitWeightsQuantizer(
+    ClusterPreserveDefaultWeightsQuantizer):
+  """ClusterPreserveWeightsQuantizer for default 8bit weights."""
+  def __init__(self, preserve_sparsity):
+    super(ClusterPreserveDefault8BitWeightsQuantizer,
+          self).__init__(num_bits=8,
+                         per_axis=False,
+                         symmetric=True,
+                         narrow_range=True,
+                         preserve_sparsity=preserve_sparsity)
+    self.preserve_sparsity = preserve_sparsity
+class ClusterPreserveDefault8BitConvWeightsQuantizer(
+    ClusterPreserveDefaultWeightsQuantizer,
+    default_8bit_quantizers.Default8BitConvWeightsQuantizer):
+  """ClusterPreserveWeightsQuantizer for default 8bit Conv2D weights."""
+  def __init__(self, preserve_sparsity):  # pylint: disable=super-init-not-called
+    default_8bit_quantizers.Default8BitConvWeightsQuantizer.__init__(self)
+    self.preserve_sparsity = preserve_sparsity
+  def build(self, tensor_shape, name, layer):
+    result = ClusterPreserveDefaultWeightsQuantizer._build_clusters(
+        self, name, layer)
+    result.update(
+        default_8bit_quantizers.Default8BitConvWeightsQuantizer.build(
+            self, tensor_shape, name, layer))
+    return result

cluster_preserve_quantize_registry_test.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for ClusterPreserveQuantizeRegistry."""
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.clustering.keras import clustering_registry
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_config
+from tensorflow_model_optimization.python.core.quantization.keras.collab_opts.cluster_preserve import cluster_preserve_quantize_registry
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import default_8bit_quantize_registry
+QuantizeConfig = quantize_config.QuantizeConfig
+layers = keras.layers
+class ClusterPreserveQuantizeRegistryTest(tf.test.TestCase):
+  def setUp(self):
+    super(ClusterPreserveQuantizeRegistryTest, self).setUp()
+    # Test CQAT by default
+    self.cluster_preserve_quantize_registry = (
+        cluster_preserve_quantize_registry.ClusterPreserveQuantizeRegistry(
+            False)
+        )
+    # layers which are supported
+    # initial and build a Conv2D layer
+    self.layer_conv2d = layers.Conv2D(10, (2, 2))
+    self.layer_conv2d.build((2, 2))
+    # initial and build a Dense layer
+    self.layer_dense = layers.Dense(10)
+    self.layer_dense.build((2, 2))
+    # initial and build a ReLU layer
+    self.layer_relu = layers.ReLU()
+    self.layer_relu.build((2, 2))
+    # a layer which is not supported
+    # initial and build a Custom layer
+    self.layer_custom = self.CustomLayer()
+    self.layer_custom.build()
+  class CustomLayer(layers.Layer):
+    """A simple custom layer with training weights."""
+    def build(self, input_shape=(2, 2)):
+      self.add_weight(shape=input_shape,
+                      initializer='random_normal',
+                      trainable=True)
+  class CustomQuantizeConfig(QuantizeConfig):
+    """A dummy concrete class for testing unregistered configs."""
+    def get_weights_and_quantizers(self, layer):
+      return []
+    def get_activations_and_quantizers(self, layer):
+      return []
+    def set_quantize_weights(self, layer, quantize_weights):
+      pass
+    def set_quantize_activations(self, layer, quantize_activations):
+      pass
+    def get_output_quantizers(self, layer):
+      return []
+    def get_config(self):
+      return {}
+  def testSupportsKerasLayer(self):
+    # test registered layer
+    self.assertTrue(
+        self.cluster_preserve_quantize_registry.supports(self.layer_dense))
+    self.assertTrue(
+        self.cluster_preserve_quantize_registry.supports(self.layer_conv2d))
+    # test layer without training weights
+    self.assertTrue(
+        self.cluster_preserve_quantize_registry.supports(self.layer_relu))
+  def testDoesNotSupportCustomLayer(self):
+    self.assertFalse(
+        self.cluster_preserve_quantize_registry.supports(self.layer_custom))
+  def testApplyClusterPreserveWithQuantizeConfig(self):
+    (self.cluster_preserve_quantize_registry
+     .apply_cluster_preserve_quantize_config(
+         self.layer_conv2d,
+         default_8bit_quantize_registry.Default8BitConvQuantizeConfig(
+             ['kernel'], ['activation'], False)))
+  def testRaisesErrorUnsupportedQuantizeConfigWithLayer(self):
+    with self.assertRaises(
+        ValueError, msg='Unregistered QuantizeConfigs should raise error.'):
+      (self.cluster_preserve_quantize_registry.
+       apply_cluster_preserve_quantize_config(
+           self.layer_conv2d, self.CustomQuantizeConfig))
+    with self.assertRaises(ValueError,
+                           msg='Unregistered layers should raise error.'):
+      (self.cluster_preserve_quantize_registry.
+       apply_cluster_preserve_quantize_config(
+           self.layer_custom, self.CustomQuantizeConfig))
+class ClusterPreserveDefault8bitQuantizeRegistryTest(tf.test.TestCase):
+  def setUp(self):
+    super(ClusterPreserveDefault8bitQuantizeRegistryTest, self).setUp()
+    self.default_8bit_quantize_registry = (
+        default_8bit_quantize_registry.Default8BitQuantizeRegistry())
+    self.cluster_registry = clustering_registry.ClusteringRegistry()
+    # Test CQAT by default
+    self.cluster_preserve_quantize_registry = (
+        cluster_preserve_quantize_registry.ClusterPreserveQuantizeRegistry(
+            False))
+  def testSupportsClusterDefault8bitQuantizeKerasLayers(self):
+    # ClusterPreserveQuantize supported layer, must be suppoted
+    # by both Cluster and Quantize
+    cqat_layers_config_map = (
+        self.cluster_preserve_quantize_registry._LAYERS_CONFIG_MAP)
+    for cqat_support_layer in cqat_layers_config_map:
+      if cqat_layers_config_map[cqat_support_layer].weight_attrs and (
+          cqat_layers_config_map[cqat_support_layer].quantize_config_attrs):
+        self.assertIn(
+            cqat_support_layer, self.cluster_registry._LAYERS_WEIGHTS_MAP,
+            msg='Clusteirng doesn\'t support {}'.format(cqat_support_layer))
+        self.assertIn(
+            cqat_support_layer,
+            self.default_8bit_quantize_registry._layer_quantize_map,
+            msg='Default 8bit QAT doesn\'t support {}'.format(
+                cqat_support_layer))
+if __name__ == '__main__':
+  tf.test.main()

collaborative_optimization.png ADDED Viewed

collaborative_optimization_dist.png ADDED Viewed

cripto.jpg ADDED Viewed

deep_crypto.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import argparse
+from predictors.btc_ltsm import BtcLtsm
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='BTC Price Prediction')
+    parser.add_argument('--update', action='store_true', help='Update the dataset')
+    parser.add_argument('--train', action='store_true', help='Train the model')
+    parser.add_argument('--test', action='store_true', help='Test the model')
+    args = parser.parse_args()
+    btc_ltsm = BtcLtsm()
+    if args.update:
+        btc_ltsm.update_dataset()
+    if args.train:
+        btc_ltsm.train()
+    if args.test:
+        btc_ltsm.load()
+        btc_ltsm.test_model()

default_n_bit_transforms.py ADDED Viewed

	@@ -0,0 +1,825 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Default 8-bit transforms."""
+import collections
+import inspect
+import numpy as np
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.keras.compat import unique_object_name
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_aware_activation
+from tensorflow_model_optimization.python.core.quantization.keras import quantize_layer
+from tensorflow_model_optimization.python.core.quantization.keras import quantizers
+from tensorflow_model_optimization.python.core.quantization.keras import utils as quantize_utils
+from tensorflow_model_optimization.python.core.quantization.keras.experimental.default_n_bit import default_n_bit_quantize_configs as configs
+from tensorflow_model_optimization.python.core.quantization.keras.experimental.default_n_bit import default_n_bit_quantize_registry
+from tensorflow_model_optimization.python.core.quantization.keras.graph_transformations import transforms
+LayerNode = transforms.LayerNode
+LayerPattern = transforms.LayerPattern
+def _get_conv_bn_layers(bn_layer_node):
+  bn_layer = bn_layer_node.layer
+  conv_layer = bn_layer_node.input_layers[0].layer
+  return conv_layer, bn_layer
+def _get_weights(bn_layer_node):
+  """Returns weight values for fused layer, including copying original values in unfused version."""
+  return collections.OrderedDict(
+      list(bn_layer_node.input_layers[0].weights.items())
+      + list(bn_layer_node.weights.items()))
+def _get_params(conv_layer, bn_layer, relu_layer=None):
+  """Retrieve conv_bn params within wrapped layers."""
+  if 'use_bias' in conv_layer['config']:
+    if conv_layer['config']['use_bias']:
+      raise ValueError(
+          'use_bias should not be set to True in a Conv layer when followed '
+          'by BatchNormalization. The bias in the Conv would be redundant '
+          'with the one in the BatchNormalization.')
+    del conv_layer['config']['use_bias']
+  if 'name' in bn_layer['config']:
+    del bn_layer['config']['name']
+  # TODO(pulkitb): remove key conflicts
+  params = dict(
+      list(conv_layer['config'].items()) + list(bn_layer['config'].items()))
+  if relu_layer is not None:
+    params['post_activation'] = quantize_utils.deserialize_layer(
+        relu_layer, use_legacy_format=True
+    )
+  return params
+def _get_layer_node(fused_layer, weights):
+  layer_config = quantize_utils.serialize_layer(
+      fused_layer, use_legacy_format=True
+  )
+  layer_config['name'] = layer_config['config']['name']
+  # This config tracks which layers get quantized, and whether they have a
+  # custom QuantizeConfig.
+  layer_metadata = {'quantize_config': None}
+  return LayerNode(layer_config, weights, metadata=layer_metadata)
+def _get_quantize_config(layer_node):
+  return layer_node.metadata.get('quantize_config')
+def _has_custom_quantize_config(*layer_nodes):
+  for layer_node in layer_nodes:
+    if _get_quantize_config(layer_node) is not None:
+      return True
+  return False
+def _normalize_tuple(value):
+  if isinstance(value, int):
+    return (value,)
+  else:
+    return tuple(value)
+class Conv2DBatchNormQuantize(transforms.Transform):
+  """Ensure FQ does not get placed between Conv and BatchNorm."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'BatchNormalization|SyncBatchNormalization',
+        inputs=[LayerPattern(
+            'Conv2D|DepthwiseConv2D', config={'activation': 'linear'})])
+  def _replace(self, bn_layer_node, conv_layer_node):
+    if _has_custom_quantize_config(bn_layer_node, conv_layer_node):
+      return bn_layer_node
+    conv_layer_node.layer['config']['activation'] = (
+        quantize_utils.serialize_activation(
+            quantize_aware_activation.NoOpActivation(), use_legacy_format=True
+        )
+    )
+    bn_layer_node.metadata['quantize_config'] = (
+        configs.DefaultNBitOutputQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    return bn_layer_node
+  def replacement(self, match_layer):
+    bn_layer_node = match_layer
+    conv_layer_node = match_layer.input_layers[0]
+    return self._replace(bn_layer_node, conv_layer_node)
+  def custom_objects(self):
+    return {
+        'NoOpQuantizeConfig':
+            configs.NoOpQuantizeConfig,
+        'NoOpActivation':
+            quantize_aware_activation.NoOpActivation
+    }
+class Conv2DReshapeBatchNormQuantize(Conv2DBatchNormQuantize):
+  """Ensure FQ does not get placed between Conv, Reshape and BatchNorm."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(Conv2DReshapeBatchNormQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'BatchNormalization|SyncBatchNormalization',
+        inputs=[LayerPattern(
+            'Lambda', config={'name': 'sepconv1d_squeeze.*'},
+            inputs=[LayerPattern(
+                'Conv2D|DepthwiseConv2D',
+                config={'activation': 'linear'})])])
+  def replacement(self, match_layer):
+    bn_layer_node = match_layer
+    reshape_layer_node = bn_layer_node.input_layers[0]
+    conv_layer_node = reshape_layer_node.input_layers[0]
+    return self._replace(bn_layer_node, conv_layer_node)
+class Conv2DBatchNormReLUQuantize(Conv2DBatchNormQuantize):
+  """Ensure FQ does not get placed between Conv, BatchNorm and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(Conv2DBatchNormReLUQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        # TODO(pulkitb): Enhance match to only occur for relu, relu1 and relu6
+        'ReLU',
+        inputs=[super(Conv2DBatchNormReLUQuantize, self).pattern()])
+  def _replace(self, relu_layer_node, bn_layer_node, conv_layer_node):
+    if _has_custom_quantize_config(
+        relu_layer_node, bn_layer_node, conv_layer_node):
+      return relu_layer_node
+    conv_layer_node.layer['config']['activation'] = (
+        quantize_utils.serialize_activation(
+            quantize_aware_activation.NoOpActivation(), use_legacy_format=True
+        )
+    )
+    bn_layer_node.metadata['quantize_config'] = (
+        configs.NoOpQuantizeConfig())
+    return relu_layer_node
+  def replacement(self, match_layer):
+    relu_layer_node = match_layer
+    bn_layer_node = relu_layer_node.input_layers[0]
+    conv_layer_node = bn_layer_node.input_layers[0]
+    return self._replace(relu_layer_node, bn_layer_node, conv_layer_node)
+class Conv2DBatchNormActivationQuantize(Conv2DBatchNormReLUQuantize):
+  """Ensure FQ does not get placed between Conv, BatchNorm and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(Conv2DBatchNormActivationQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Activation',
+        config={'activation': 'relu'},
+        inputs=[Conv2DBatchNormQuantize.pattern(self)])
+class Conv2DReshapeBatchNormReLUQuantize(Conv2DBatchNormReLUQuantize):
+  """Ensure FQ does not get placed between Conv, BatchNorm and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(Conv2DReshapeBatchNormReLUQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'ReLU',
+        inputs=[Conv2DReshapeBatchNormQuantize.pattern(self)])
+  def replacement(self, match_layer):
+    relu_layer_node = match_layer
+    bn_layer_node = relu_layer_node.input_layers[0]
+    squeeze_layer_node = bn_layer_node.input_layers[0]
+    conv_layer_node = squeeze_layer_node.input_layers[0]
+    return self._replace(relu_layer_node, bn_layer_node, conv_layer_node)
+class Conv2DReshapeBatchNormActivationQuantize(
+    Conv2DReshapeBatchNormReLUQuantize):
+  """Ensure FQ does not get placed between Conv, BatchNorm and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(Conv2DReshapeBatchNormActivationQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Activation',
+        config={'activation': 'relu'},
+        inputs=[Conv2DReshapeBatchNormQuantize.pattern(self)])
+class DenseBatchNormQuantize(transforms.Transform):
+  """Transform to be applied to "Dense"+ "BatchNorm" Graph.
+  This transform disables Quantization between Dense and BatchNorm
+  to ensure FQ does not get placed between them.
+  """
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'BatchNormalization|SyncBatchNormalization',
+        inputs=[LayerPattern('Dense', config={'activation': 'linear'})])
+  def _replace(self, bn_layer_node, dense_layer_node):
+    if _has_custom_quantize_config(bn_layer_node, dense_layer_node):
+      return bn_layer_node
+    dense_layer_node.layer['config']['activation'] = (
+        quantize_utils.serialize_activation(
+            quantize_aware_activation.NoOpActivation(), use_legacy_format=True
+        )
+    )
+    bn_layer_node.metadata['quantize_config'] = (
+        configs.DefaultNBitOutputQuantizeConfig(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    return bn_layer_node
+  def replacement(self, match_layer):
+    bn_layer_node = match_layer
+    dense_layer_node = match_layer.input_layers[0]
+    return self._replace(bn_layer_node, dense_layer_node)
+  def custom_objects(self):
+    return {
+        'DefaultNBitOutputQuantizeConfig':
+            configs.DefaultNBitOutputQuantizeConfig,
+        'NoOpQuantizeConfig':
+            configs.NoOpQuantizeConfig,
+        'NoOpActivation': quantize_aware_activation.NoOpActivation
+    }
+class DenseBatchNormReLUQuantize(DenseBatchNormQuantize):
+  """Transform to be applied to "Dense"+ "BatchNorm" + "ReLU" Graph.
+  This transform disables Quantization between Dense, BatchNorm and ReLU
+  to ensure FQ does not get placed between them.
+  """
+  def pattern(self):
+    return LayerPattern(
+        'ReLU', inputs=[super(DenseBatchNormReLUQuantize, self).pattern()])
+  def _replace(self, relu_layer_node, bn_layer_node, dense_layer_node):
+    if _has_custom_quantize_config(relu_layer_node, bn_layer_node,
+                                   dense_layer_node):
+      return relu_layer_node
+    dense_layer_node.layer['config']['activation'] = (
+        quantize_utils.serialize_activation(
+            quantize_aware_activation.NoOpActivation(), use_legacy_format=True
+        )
+    )
+    bn_layer_node.metadata['quantize_config'] = (
+        configs.NoOpQuantizeConfig())
+    return relu_layer_node
+  def replacement(self, match_layer):
+    relu_layer_node = match_layer
+    bn_layer_node = relu_layer_node.input_layers[0]
+    dense_layer_node = bn_layer_node.input_layers[0]
+    return self._replace(relu_layer_node, bn_layer_node, dense_layer_node)
+class DenseBatchNormActivationQuantize(DenseBatchNormReLUQuantize):
+  """Transform to be applied to "Dense"+ "BatchNorm" + "ReLU" Graph.
+  This transform disables Quantization between Dense, BatchNorm and ReLU
+  to ensure FQ does not get placed between them.
+  """
+  def pattern(self):
+    return LayerPattern(
+        'Activation',
+        config={'activation': 'relu'},
+        inputs=[DenseBatchNormQuantize.pattern(self)])
+class SeparableConv1DQuantize(transforms.Transform):
+  """Add QAT support for Keras SeparableConv1D layer.
+  Transforms SeparableConv1D into a SeparableConv2D invocation. The Keras
+  SeparableConv1D layer internally uses the same code as a SeparbaleConv2D
+  layer. It simple expands and squeezes the tensor dimensions before and after
+  the convolutions. Applying this transform ensures the QAT handling for
+  SeparableConv2D kicks in and handles the FQ placement properly.
+  Maps:
+  Input -> SeparableConv1D -> Output
+    to
+  Input -> Lambda(ExpandDims) -> SeparableConv2D -> Lambda(Squeeze) -> Output
+  Unlike SeparableConv2DQuantize, this does not break the layer into
+  DepthwiseConv and Conv separately, since no DepthwiseConv1D exists.
+  """
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern('SeparableConv1D')
+  def _get_name(self, prefix):
+    # TODO(pulkitb): Move away from `unique_object_name` since it isn't
+    # exposed as externally usable.
+    return unique_object_name(prefix)
+  def replacement(self, match_layer):
+    if _has_custom_quantize_config(match_layer):
+      return match_layer
+    sepconv1d_layer = match_layer.layer
+    sepconv1d_config = sepconv1d_layer['config']
+    sepconv1d_weights = list(match_layer.weights.values())
+    padding = sepconv1d_config['padding']
+    # SepConv2D does not accept causal padding, and SepConv1D has some special
+    # handling for it.
+    # TODO(pulkitb): Add support for causal padding.
+    if padding == 'causal':
+      raise ValueError('SeparableConv1D with causal padding is not supported.')
+    # TODO(pulkitb): Handle other base_layer args such as dtype, input_dim etc.
+    sepconv2d_layer = keras.layers.SeparableConv2D(
+        filters=sepconv1d_config['filters'],
+        kernel_size=(1,) + _normalize_tuple(sepconv1d_config['kernel_size']),
+        strides=_normalize_tuple(sepconv1d_config['strides']) * 2,
+        padding=padding,
+        data_format=sepconv1d_config['data_format'],
+        dilation_rate=(1,)
+        + _normalize_tuple(sepconv1d_config['dilation_rate']),
+        depth_multiplier=sepconv1d_config['depth_multiplier'],
+        activation=sepconv1d_config['activation'],
+        use_bias=sepconv1d_config['use_bias'],
+        depthwise_initializer=sepconv1d_config['depthwise_initializer'],
+        pointwise_initializer=sepconv1d_config['pointwise_initializer'],
+        bias_initializer=sepconv1d_config['bias_initializer'],
+        depthwise_regularizer=sepconv1d_config['depthwise_regularizer'],
+        pointwise_regularizer=sepconv1d_config['pointwise_regularizer'],
+        bias_regularizer=sepconv1d_config['bias_regularizer'],
+        activity_regularizer=sepconv1d_config['activity_regularizer'],
+        depthwise_constraint=sepconv1d_config['depthwise_constraint'],
+        pointwise_constraint=sepconv1d_config['pointwise_constraint'],
+        bias_constraint=sepconv1d_config['bias_constraint'],
+        # TODO(pulkitb): Rethink what to do for name. Using the same name leads
+        # to confusion, since it's typically separable_conv1d
+        name=sepconv1d_config['name'] + '_QAT_SepConv2D',
+        trainable=sepconv1d_config['trainable'],
+    )
+    sepconv2d_weights = collections.OrderedDict()
+    sepconv2d_weights['depthwise_kernel:0'] = np.expand_dims(
+        sepconv1d_weights[0], 0)
+    sepconv2d_weights['pointwise_kernel:0'] = np.expand_dims(
+        sepconv1d_weights[1], 0)
+    if sepconv1d_config['use_bias']:
+      sepconv2d_weights['bias:0'] = sepconv1d_weights[2]
+    if sepconv1d_config['data_format'] == 'channels_last':
+      spatial_dim = 1
+    else:
+      spatial_dim = 2
+    sepconv2d_layer_config = quantize_utils.serialize_layer(
+        sepconv2d_layer, use_legacy_format=True
+    )
+    sepconv2d_layer_config['name'] = sepconv2d_layer.name
+    # Needed to ensure these new layers are considered for quantization.
+    sepconv2d_metadata = {'quantize_config': None}
+    # TODO(pulkitb): Consider moving from Lambda to custom ExpandDims/Squeeze.
+    # Layer before SeparableConv2D which expands input tensors to match 2D.
+    expand_layer = keras.layers.Lambda(
+        lambda x: tf.expand_dims(x, spatial_dim),
+        name=self._get_name('sepconv1d_expand'),
+    )
+    expand_layer_config = quantize_utils.serialize_layer(
+        expand_layer, use_legacy_format=True
+    )
+    expand_layer_config['name'] = expand_layer.name
+    expand_layer_metadata = {
+        'quantize_config':
+            configs.NoOpQuantizeConfig()}
+    squeeze_layer = keras.layers.Lambda(
+        lambda x: tf.squeeze(x, [spatial_dim]),
+        name=self._get_name('sepconv1d_squeeze'),
+    )
+    squeeze_layer_config = quantize_utils.serialize_layer(
+        squeeze_layer, use_legacy_format=True
+    )
+    squeeze_layer_config['name'] = squeeze_layer.name
+    squeeze_layer_metadata = {
+        'quantize_config':
+            configs.NoOpQuantizeConfig()}
+    return LayerNode(
+        squeeze_layer_config,
+        metadata=squeeze_layer_metadata,
+        input_layers=[LayerNode(
+            sepconv2d_layer_config,
+            weights=sepconv2d_weights,
+            metadata=sepconv2d_metadata,
+            input_layers=[LayerNode(
+                expand_layer_config, metadata=expand_layer_metadata)]
+            )])
+class SeparableConvQuantize(transforms.Transform):
+  """Break SeparableConv into a DepthwiseConv and Conv layer.
+  SeparableConv is a composition of a DepthwiseConv and a Conv layer. For the
+  purpose of quantization, a FQ operation needs to be placed between the output
+  of DepthwiseConv and the following Conv.
+  This is needed since there is a dynamic tensor in between the two layers, and
+  it's range information needs to be captured by the FakeQuant op to ensure
+  full int8 quantization of the layers is possible.
+  Splitting the layer into 2 ensures that each individual layer is handled
+  correctly with respect to quantization.
+  """
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern('SeparableConv2D')
+  def replacement(self, match_layer):
+    if _has_custom_quantize_config(match_layer):
+      return match_layer
+    sepconv_layer = match_layer.layer
+    sepconv_weights = list(match_layer.weights.values())
+    # TODO(pulkitb): SeparableConv has kwargs other than constructor args which
+    # need to be handled.
+    # Applicable to both layers: trainable, dtype, name
+    # Applicable to dconv: input_dim, input_shape, batch_input_shape, batch_size
+    # Needs special handling: weights
+    # Unknown: dynamic, autocast
+    dconv_layer = keras.layers.DepthwiseConv2D(
+        kernel_size=sepconv_layer['config']['kernel_size'],
+        strides=sepconv_layer['config']['strides'],
+        padding=sepconv_layer['config']['padding'],
+        depth_multiplier=sepconv_layer['config']['depth_multiplier'],
+        data_format=sepconv_layer['config']['data_format'],
+        dilation_rate=sepconv_layer['config']['dilation_rate'],
+        activation=None,
+        use_bias=False,
+        depthwise_initializer=sepconv_layer['config']['depthwise_initializer'],
+        depthwise_regularizer=sepconv_layer['config']['depthwise_regularizer'],
+        depthwise_constraint=sepconv_layer['config']['depthwise_constraint'],
+        trainable=sepconv_layer['config']['trainable'],
+    )
+    dconv_weights = collections.OrderedDict()
+    dconv_weights['depthwise_kernel:0'] = sepconv_weights[0]
+    dconv_layer_config = quantize_utils.serialize_layer(
+        dconv_layer, use_legacy_format=True
+    )
+    dconv_layer_config['name'] = dconv_layer.name
+    # Needed to ensure these new layers are considered for quantization.
+    dconv_metadata = {'quantize_config': None}
+    conv_layer = keras.layers.Conv2D(
+        filters=sepconv_layer['config']['filters'],
+        kernel_size=(1, 1),  # (1,) * rank
+        strides=(1, 1),
+        padding='valid',
+        data_format=sepconv_layer['config']['data_format'],
+        dilation_rate=sepconv_layer['config']['dilation_rate'],
+        groups=1,
+        activation=sepconv_layer['config']['activation'],
+        use_bias=sepconv_layer['config']['use_bias'],
+        kernel_initializer=sepconv_layer['config']['pointwise_initializer'],
+        bias_initializer=sepconv_layer['config']['bias_initializer'],
+        kernel_regularizer=sepconv_layer['config']['pointwise_regularizer'],
+        bias_regularizer=sepconv_layer['config']['bias_regularizer'],
+        activity_regularizer=sepconv_layer['config']['activity_regularizer'],
+        kernel_constraint=sepconv_layer['config']['pointwise_constraint'],
+        bias_constraint=sepconv_layer['config']['bias_constraint'],
+        trainable=sepconv_layer['config']['trainable'],
+    )
+    conv_weights = collections.OrderedDict()
+    conv_weights['kernel:0'] = sepconv_weights[1]
+    if sepconv_layer['config']['use_bias']:
+      conv_weights['bias:0'] = sepconv_weights[2]
+    conv_layer_config = quantize_utils.serialize_layer(
+        conv_layer, use_legacy_format=True
+    )
+    conv_layer_config['name'] = conv_layer.name
+    # Needed to ensure these new layers are considered for quantization.
+    conv_metadata = {'quantize_config': None}
+    dconv_layer_node = LayerNode(
+        dconv_layer_config, weights=dconv_weights, metadata=dconv_metadata)
+    return LayerNode(
+        conv_layer_config,
+        weights=conv_weights,
+        input_layers=[dconv_layer_node],
+        metadata=conv_metadata)
+class LayerReLUQuantize(transforms.Transform):
+  """Ensure FQ does not get placed between Add and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'ReLU', inputs=[LayerPattern('Add|Conv2D|DepthwiseConv2D|Dense')])
+  def replacement(self, match_layer):
+    relu_layer_node = match_layer
+    add_layer_node = relu_layer_node.input_layers[0]
+    add_layer_node.metadata['quantize_config'] = (
+        configs.NoOpQuantizeConfig())
+    return match_layer
+  def custom_objects(self):
+    return {
+        'NoOpQuantizeConfig':
+            configs.NoOpQuantizeConfig,
+    }
+class LayerReluActivationQuantize(LayerReLUQuantize):
+  """Ensure FQ does not get placed between Add and ReLU."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(LayerReluActivationQuantize, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Activation',
+        config={'activation': 'relu'},
+        inputs=[LayerPattern('Add|Conv2D|DepthwiseConv2D|Dense')])
+class InputLayerQuantize(transforms.Transform):
+  """Quantizes InputLayer, by adding QuantizeLayer after it.
+  InputLayer => InputLayer -> QuantizeLayer
+  """
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern('InputLayer')
+  def replacement(self, match_layer):
+    quant_layer = quantize_layer.QuantizeLayer(
+        quantizers.AllValuesQuantizer(
+            num_bits=self._num_bits_activation, per_axis=False,
+            symmetric=False, narrow_range=False))  # activation/output
+    layer_config = quantize_utils.serialize_layer(
+        quant_layer, use_legacy_format=True
+    )
+    layer_config['name'] = quant_layer.name
+    quant_layer_node = LayerNode(
+        layer_config,
+        input_layers=[match_layer])
+    return quant_layer_node
+  def custom_objects(self):
+    return {
+        'QuantizeLayer': quantize_layer.QuantizeLayer,
+        'MovingAverageQuantizer': quantizers.MovingAverageQuantizer,
+        'AllValuesQuantizer': quantizers.AllValuesQuantizer
+    }
+class ConcatTransform(transforms.Transform):
+  """Transform for Concatenate. Quantize only after concatenation."""
+  # pylint:disable=protected-access
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    # TODO(pulkitb): Write a clean way to handle length patterns.
+    return LayerPattern(
+        'Concatenate', inputs=[LayerPattern('.*'), LayerPattern('.*')])
+  def _get_layer_type(self, layer_class_name):
+    keras_layers = inspect.getmembers(keras.layers, inspect.isclass)
+    for layer_name, layer_type in keras_layers:
+      if layer_name == layer_class_name:
+        return layer_type
+    return None
+  def _disable_output_quantize(self, quantize_config):
+    # TODO(pulkitb): Disabling quantize_config may also require handling
+    # activation quantizers. Handle that properly.
+    quantize_config.get_output_quantizers = lambda layer: []
+  def replacement(self, match_layer):
+    concat_layer_node = match_layer
+    feeding_layer_nodes = match_layer.input_layers
+    default_registry = (
+        default_n_bit_quantize_registry.DefaultNBitQuantizeRegistry(
+            num_bits_weight=self._num_bits_weight,
+            num_bits_activation=self._num_bits_activation))
+    feed_quantize_configs = []
+    for feed_layer_node in feeding_layer_nodes:
+      quantize_config = feed_layer_node.metadata.get('quantize_config')
+      if not quantize_config:
+        layer_class = self._get_layer_type(feed_layer_node.layer['class_name'])
+        if layer_class is None:
+          # Concat has an input layer we don't recognize. Return.
+          return match_layer
+        if layer_class == keras.layers.Concatenate:
+          # Input layer to Concat is also Concat. Don't quantize it.
+          feed_layer_node.metadata['quantize_config'] = (
+              configs.NoOpQuantizeConfig())
+          continue
+        if not default_registry._is_supported_layer(layer_class):
+          # Feeding layer is not supported by registry
+          return match_layer
+        quantize_config = default_registry._get_quantize_config(layer_class)
+        feed_layer_node.metadata['quantize_config'] = quantize_config
+      feed_quantize_configs.append(quantize_config)
+    # TODO(pulkitb): this currently only disables output quantize config, but
+    # cannot properly handle if the FQ was added to the activation. Hand this
+    # properly.
+    for quantize_config in feed_quantize_configs:
+      self._disable_output_quantize(quantize_config)
+    if not concat_layer_node.metadata.get('quantize_config'):
+      concat_layer_node.metadata['quantize_config'] = (
+          configs.DefaultNBitOutputQuantizeConfig(
+              num_bits_weight=self._num_bits_weight,
+              num_bits_activation=self._num_bits_activation))
+    return concat_layer_node
+  # pylint:enable=protected-access
+class ConcatTransform3Inputs(ConcatTransform):
+  """Transform for 3 inputs Concatenate."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(ConcatTransform3Inputs, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Concatenate',
+        inputs=[LayerPattern('.*'), LayerPattern('.*'), LayerPattern('.*')])
+class ConcatTransform4Inputs(ConcatTransform):
+  """Transform for 4 inputs Concatenate."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(ConcatTransform4Inputs, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Concatenate',
+        inputs=[LayerPattern('.*'), LayerPattern('.*'), LayerPattern('.*'),
+                LayerPattern('.*')])
+class ConcatTransform5Inputs(ConcatTransform):
+  """Transform for 5 inputs Concatenate."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(ConcatTransform5Inputs, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Concatenate',
+        inputs=[LayerPattern('.*'), LayerPattern('.*'), LayerPattern('.*'),
+                LayerPattern('.*'), LayerPattern('.*')])
+class ConcatTransform6Inputs(ConcatTransform):
+  """Transform for 6 inputs Concatenate."""
+  def __init__(self, num_bits_weight: int = 8, num_bits_activation: int = 8):
+    super(ConcatTransform6Inputs, self).__init__(
+        num_bits_weight=num_bits_weight,
+        num_bits_activation=num_bits_activation)
+    self._num_bits_weight = num_bits_weight
+    self._num_bits_activation = num_bits_activation
+  def pattern(self):
+    return LayerPattern(
+        'Concatenate',
+        inputs=[LayerPattern('.*'), LayerPattern('.*'), LayerPattern('.*'),
+                LayerPattern('.*'), LayerPattern('.*'), LayerPattern('.*')])

main.py ADDED Viewed

	@@ -0,0 +1,29 @@

+python main. py
+    Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+model. safetensors.index.json: 100%|
+| 13.5k/13.5k [00:00‹?, PB/s]
+model-00001-of-00002. safetensors: 100%
+| 4.95G/4.95G [07:27<00:00, 11. 1MB/s]
+model-00002-of-00002. safetensors: 100%
+67. 1M/67.1M [00:05<00:00, 11.5MB/s]
+Downloading shards: 100% ||
+| 2/2 [07:35‹00:00, 227.61s/it]
+Gemma's activation function should be approximate GeLU and not exact GeLU. Changing the activation function to 'gelu_pytorch_tanh.if you want to use the legacy "gelu', edit the "model.config to
+set hidden_activation=gelu*
+instead of todden act
+instead of hidden_act. See https://github.com/huggingface/transformers/pull/29402 for
+more details.
+Loading checkpoint shards: 100%|
+| 2/2 [00:03<00:00, 1.87s/itl
+generation_config json: 100%||
+137/137［00:00<？」3B/s］
+nexa model result:
+a pouto using the specified caea and resolutiou stones iption: rame rs a photo (cama a):)
+Captures
+- camera (str): Specifies the camera
+to use. Can be \'front\' or \'back\'. The default is \'back\'. \n\n
+Returns: \n
+- str: The string contains the file
+2624 t 12 4a.
+Photo if nees at ay 96 83662387968t, ample: /storage/emulated/o/Pictures/NAPP/3N
+123456.Jpg\'\n latency: 367.85967230796814

misc.py ADDED Viewed

	@@ -0,0 +1,173 @@

+# Copyright 2019, The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Misc."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.core import encoding_stage
+@encoding_stage.tf_style_encoding_stage
+class SplitBySmallValueEncodingStage(encoding_stage.EncodingStageInterface):
+  """Encoding stage splitting the input by small values.
+  This encoding stage will split the input into two outputs: the value and the
+  indices of the elements whose absolute value is larger than a certain
+  threshold. The elements smaller than the threshold is then decoded to zero.
+  """
+  ENCODED_INDICES_KEY = 'indices'
+  ENCODED_VALUES_KEY = 'non_zero_floats'
+  THRESHOLD_PARAMS_KEY = 'threshold'
+  def __init__(self, threshold=1e-8):
+    """Initializer for the SplitBySmallValueEncodingStage.
+    Args:
+      threshold: The threshold of the small weights to be set to zero.
+    """
+    self._threshold = threshold
+  @property
+  def name(self):
+    """See base class."""
+    return 'split_by_small_value'
+  @property
+  def compressible_tensors_keys(self):
+    """See base class."""
+    return [
+        self.ENCODED_VALUES_KEY,
+        self.ENCODED_INDICES_KEY,
+    ]
+  @property
+  def commutes_with_sum(self):
+    """See base class."""
+    return False
+  @property
+  def decode_needs_input_shape(self):
+    """See base class."""
+    return True
+  def get_params(self):
+    """See base class."""
+    encode_params = collections.OrderedDict([(self.THRESHOLD_PARAMS_KEY,
+                                              self._threshold)])
+    decode_params = collections.OrderedDict()
+    return encode_params, decode_params
+  def encode(self, x, encode_params):
+    """See base class."""
+    threshold = tf.cast(encode_params[self.THRESHOLD_PARAMS_KEY], x.dtype)
+    indices = tf.cast(tf.compat.v2.where(tf.abs(x) > threshold), tf.int32)
+    non_zero_x = tf.gather_nd(x, indices)
+    indices = tf.squeeze(indices, axis=1)
+    return collections.OrderedDict([
+        (self.ENCODED_INDICES_KEY, indices),
+        (self.ENCODED_VALUES_KEY, non_zero_x),
+    ])
+  def decode(self,
+             encoded_tensors,
+             decode_params,
+             num_summands=None,
+             shape=None):
+    """See base class."""
+    del decode_params, num_summands  # Unused.
+    indices = encoded_tensors[self.ENCODED_INDICES_KEY]
+    non_zero_x = encoded_tensors[self.ENCODED_VALUES_KEY]
+    indices = tf.expand_dims(indices, 1)
+    indices = tf.cast(indices, tf.int64)
+    shape = tf.cast(shape, tf.int64)
+    sparse_tensor = tf.SparseTensor(indices=indices, values=non_zero_x,
+                                    dense_shape=shape)
+    decoded_x = tf.sparse.to_dense(sparse_tensor)
+    return decoded_x
+@encoding_stage.tf_style_encoding_stage
+class DifferenceBetweenIntegersEncodingStage(
+    encoding_stage.EncodingStageInterface):
+  """Encoding stage taking the difference between a sequence of integers.
+  This encoding stage can be useful when the original integers can be large, but
+  the difference of the integers are much smaller values and have a more compact
+  representation. For example, it can be combined with the
+  `SplitBySmallValueEncodingStage` to further compress the increasing sequence
+  of indices.
+  The encode method expects a tensor with 1 dimension and with integer dtype.
+  """
+  ENCODED_VALUES_KEY = 'difference_between_integers'
+  @property
+  def name(self):
+    """See base class."""
+    return 'difference_between_integers'
+  @property
+  def compressible_tensors_keys(self):
+    """See base class."""
+    return [
+        self.ENCODED_VALUES_KEY,
+    ]
+  @property
+  def commutes_with_sum(self):
+    """See base class."""
+    return False
+  @property
+  def decode_needs_input_shape(self):
+    """See base class."""
+    return False
+  def get_params(self):
+    """See base class."""
+    return collections.OrderedDict(), collections.OrderedDict()
+  def encode(self, x, encode_params):
+    """See base class."""
+    del encode_params  # Unused.
+    if x.shape.ndims != 1:
+      raise ValueError('Number of dimensions must be 1. Shape of x: %s' %
+                       x.shape)
+    if not x.dtype.is_integer:
+      raise TypeError(
+          'Unsupported input type: %s. Support only integer types.' % x.dtype)
+    diff_x = x - tf.concat([[0], x[:-1]], 0)
+    return collections.OrderedDict([(self.ENCODED_VALUES_KEY, diff_x)])
+  def decode(self,
+             encoded_tensors,
+             decode_params,
+             num_summands=None,
+             shape=None):
+    """See base class."""
+    del decode_params, num_summands, shape  # Unused
+    return tf.cumsum(encoded_tensors[self.ENCODED_VALUES_KEY])

misc_test.py ADDED Viewed

	@@ -0,0 +1,192 @@

+# Copyright 2019, The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import itertools
+from absl.testing import parameterized
+import numpy as np
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.stages.research import misc
+from tensorflow_model_optimization.python.core.internal.tensor_encoding.testing import test_utils
+if tf.executing_eagerly():
+  tf.compat.v1.disable_eager_execution()
+class SplitBySmallValueEncodingStageTest(test_utils.BaseEncodingStageTest):
+  def default_encoding_stage(self):
+    """See base class."""
+    return misc.SplitBySmallValueEncodingStage()
+  def default_input(self):
+    """See base class."""
+    return tf.random.uniform([50], minval=-1.0, maxval=1.0)
+  @property
+  def is_lossless(self):
+    """See base class."""
+    return False
+  def common_asserts_for_test_data(self, data):
+    """See base class."""
+    self._assert_is_integer(
+        data.encoded_x[misc.SplitBySmallValueEncodingStage.ENCODED_INDICES_KEY])
+  def _assert_is_integer(self, indices):
+    """Asserts that indices values are integers."""
+    assert indices.dtype == np.int32
+  @parameterized.parameters([tf.float32, tf.float64])
+  def test_input_types(self, x_dtype):
+    # Tests different input dtypes.
+    x = tf.constant([1.0, 0.1, 0.01, 0.001, 0.0001], dtype=x_dtype)
+    threshold = 0.05
+    stage = misc.SplitBySmallValueEncodingStage(threshold=threshold)
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = test_utils.TestData(x, encoded_x, decoded_x)
+    test_data = self.evaluate_test_data(test_data)
+    self._assert_is_integer(test_data.encoded_x[
+        misc.SplitBySmallValueEncodingStage.ENCODED_INDICES_KEY])
+    # The numpy arrays must have the same dtype as the arrays from test_data.
+    expected_encoded_values = np.array([1.0, 0.1], dtype=x.dtype.as_numpy_dtype)
+    expected_encoded_indices = np.array([0, 1], dtype=np.int32)
+    expected_decoded_x = np.array([1.0, 0.1, 0., 0., 0.],
+                                  dtype=x_dtype.as_numpy_dtype)
+    self.assertAllEqual(test_data.encoded_x[stage.ENCODED_VALUES_KEY],
+                        expected_encoded_values)
+    self.assertAllEqual(test_data.encoded_x[stage.ENCODED_INDICES_KEY],
+                        expected_encoded_indices)
+    self.assertAllEqual(test_data.decoded_x, expected_decoded_x)
+  def test_all_zero_input_works(self):
+    # Tests that encoding does not blow up with all-zero input. With all-zero
+    # input, both of the encoded values will be empty arrays.
+    stage = misc.SplitBySmallValueEncodingStage()
+    test_data = self.run_one_to_many_encode_decode(stage,
+                                                   lambda: tf.zeros([50]))
+    self.assertAllEqual(np.zeros((50)).astype(np.float32), test_data.decoded_x)
+  def test_all_below_threshold_works(self):
+    # Tests that encoding does not blow up with all-below-threshold input. In
+    # this case, both of the encoded values will be empty arrays.
+    stage = misc.SplitBySmallValueEncodingStage(threshold=0.1)
+    x = tf.random.uniform([50], minval=-0.01, maxval=0.01)
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = test_utils.TestData(x, encoded_x, decoded_x)
+    test_data = self.evaluate_test_data(test_data)
+    expected_encoded_indices = np.array([], dtype=np.int32).reshape([0])
+    self.assertAllEqual(test_data.encoded_x[stage.ENCODED_VALUES_KEY], [])
+    self.assertAllEqual(test_data.encoded_x[stage.ENCODED_INDICES_KEY],
+                        expected_encoded_indices)
+    self.assertAllEqual(test_data.decoded_x,
+                        np.zeros([50], dtype=x.dtype.as_numpy_dtype))
+class DifferenceBetweenIntegersEncodingStageTest(
+    test_utils.BaseEncodingStageTest):
+  def default_encoding_stage(self):
+    """See base class."""
+    return misc.DifferenceBetweenIntegersEncodingStage()
+  def default_input(self):
+    """See base class."""
+    return tf.random.uniform([10], minval=0, maxval=10, dtype=tf.int64)
+  @property
+  def is_lossless(self):
+    """See base class."""
+    return True
+  def common_asserts_for_test_data(self, data):
+    """See base class."""
+    self.assertAllEqual(data.x, data.decoded_x)
+  @parameterized.parameters(
+      itertools.product([[1,], [2,], [10,]], [tf.int32, tf.int64]))
+  def test_with_multiple_input_shapes(self, input_dims, dtype):
+    def x_fn():
+      return tf.random.uniform(input_dims, minval=0, maxval=10, dtype=dtype)
+    test_data = self.run_one_to_many_encode_decode(
+        self.default_encoding_stage(), x_fn)
+    self.common_asserts_for_test_data(test_data)
+  def test_empty_input_static(self):
+    # Tests that the encoding works when the input shape is [0].
+    x = []
+    x = tf.convert_to_tensor(x, dtype=tf.int32)
+    assert x.shape.as_list() == [0]
+    stage = self.default_encoding_stage()
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = self.evaluate_test_data(
+        test_utils.TestData(x, encoded_x, decoded_x))
+    self.common_asserts_for_test_data(test_data)
+  def test_empty_input_dynamic(self):
+    # Tests that the encoding works when the input shape is [0], but not
+    # statically known.
+    y = tf.zeros((10,))
+    indices = tf.compat.v2.where(tf.abs(y) > 1e-8)
+    x = tf.gather_nd(y, indices)
+    x = tf.cast(x, tf.int32)  # Empty tensor.
+    assert x.shape.as_list() == [None]
+    stage = self.default_encoding_stage()
+    encode_params, decode_params = stage.get_params()
+    encoded_x, decoded_x = self.encode_decode_x(stage, x, encode_params,
+                                                decode_params)
+    test_data = self.evaluate_test_data(
+        test_utils.TestData(x, encoded_x, decoded_x))
+    assert test_data.x.shape == (0,)
+    assert test_data.encoded_x[stage.ENCODED_VALUES_KEY].shape == (0,)
+    assert test_data.decoded_x.shape == (0,)
+  @parameterized.parameters([tf.bool, tf.float32])
+  def test_encode_unsupported_type_raises(self, dtype):
+    stage = self.default_encoding_stage()
+    with self.assertRaisesRegexp(TypeError, 'Unsupported input type'):
+      self.run_one_to_many_encode_decode(
+          stage, lambda: tf.cast(self.default_input(), dtype))
+  def test_encode_unsupported_input_shape_raises(self):
+    x = tf.random.uniform((3, 4), maxval=10, dtype=tf.int32)
+    stage = self.default_encoding_stage()
+    params, _ = stage.get_params()
+    with self.assertRaisesRegexp(ValueError, 'Number of dimensions must be 1'):
+      stage.encode(x, params)
+if __name__ == '__main__':
+  tf.test.main()

mnist_cnn.py ADDED Viewed

	@@ -0,0 +1,190 @@

+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=missing-docstring
+"""Train a simple convnet on the MNIST dataset."""
+from __future__ import print_function
+from absl import app as absl_app
+from absl import flags
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.sparsity.keras import prune
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule
+PolynomialDecay = pruning_schedule.PolynomialDecay
+l = keras.layers
+FLAGS = flags.FLAGS
+batch_size = 128
+num_classes = 10
+epochs = 12
+flags.DEFINE_string('output_dir', '/tmp/mnist_train/',
+                    'Output directory to hold tensorboard events')
+def build_sequential_model(input_shape):
+  return keras.Sequential([
+      l.Conv2D(
+          32, 5, padding='same', activation='relu', input_shape=input_shape
+      ),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      l.BatchNormalization(),
+      l.Conv2D(64, 5, padding='same', activation='relu'),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      l.Flatten(),
+      l.Dense(1024, activation='relu'),
+      l.Dropout(0.4),
+      l.Dense(num_classes, activation='softmax'),
+  ])
+def build_functional_model(input_shape):
+  inp = keras.Input(shape=input_shape)
+  x = l.Conv2D(32, 5, padding='same', activation='relu')(inp)
+  x = l.MaxPooling2D((2, 2), (2, 2), padding='same')(x)
+  x = l.BatchNormalization()(x)
+  x = l.Conv2D(64, 5, padding='same', activation='relu')(x)
+  x = l.MaxPooling2D((2, 2), (2, 2), padding='same')(x)
+  x = l.Flatten()(x)
+  x = l.Dense(1024, activation='relu')(x)
+  x = l.Dropout(0.4)(x)
+  out = l.Dense(num_classes, activation='softmax')(x)
+  return keras.models.Model([inp], [out])
+def build_layerwise_model(input_shape, **pruning_params):
+  return keras.Sequential([
+      prune.prune_low_magnitude(
+          l.Conv2D(32, 5, padding='same', activation='relu'),
+          input_shape=input_shape,
+          **pruning_params
+      ),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      l.BatchNormalization(),
+      prune.prune_low_magnitude(
+          l.Conv2D(64, 5, padding='same', activation='relu'), **pruning_params
+      ),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      l.Flatten(),
+      prune.prune_low_magnitude(
+          l.Dense(1024, activation='relu'), **pruning_params
+      ),
+      l.Dropout(0.4),
+      prune.prune_low_magnitude(
+          l.Dense(num_classes, activation='softmax'), **pruning_params
+      ),
+  ])
+def train_and_save(models, x_train, y_train, x_test, y_test):
+  for model in models:
+    model.compile(
+        loss=keras.losses.categorical_crossentropy,
+        optimizer='adam',
+        metrics=['accuracy'],
+    )
+    # Print the model summary.
+    model.summary()
+    # Add a pruning step callback to peg the pruning step to the optimizer's
+    # step. Also add a callback to add pruning summaries to tensorboard
+    callbacks = [
+        pruning_callbacks.UpdatePruningStep(),
+        pruning_callbacks.PruningSummaries(log_dir=FLAGS.output_dir)
+    ]
+    model.fit(
+        x_train,
+        y_train,
+        batch_size=batch_size,
+        epochs=epochs,
+        verbose=1,
+        callbacks=callbacks,
+        validation_data=(x_test, y_test))
+    score = model.evaluate(x_test, y_test, verbose=0)
+    print('Test loss:', score[0])
+    print('Test accuracy:', score[1])
+    # Export and import the model. Check that accuracy persists.
+    saved_model_dir = '/tmp/saved_model'
+    print('Saving model to: ', saved_model_dir)
+    keras.models.save_model(model, saved_model_dir, save_format='tf')
+    print('Loading model from: ', saved_model_dir)
+    loaded_model = keras.models.load_model(saved_model_dir)
+    score = loaded_model.evaluate(x_test, y_test, verbose=0)
+    print('Test loss:', score[0])
+    print('Test accuracy:', score[1])
+def main(unused_argv):
+  # input image dimensions
+  img_rows, img_cols = 28, 28
+  # the data, shuffled and split between train and test sets
+  (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+  if keras.backend.image_data_format() == 'channels_first':
+    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
+    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
+    input_shape = (1, img_rows, img_cols)
+  else:
+    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
+    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
+    input_shape = (img_rows, img_cols, 1)
+  x_train = x_train.astype('float32')
+  x_test = x_test.astype('float32')
+  x_train /= 255
+  x_test /= 255
+  print('x_train shape:', x_train.shape)
+  print(x_train.shape[0], 'train samples')
+  print(x_test.shape[0], 'test samples')
+  # convert class vectors to binary class matrices
+  y_train = keras.utils.to_categorical(y_train, num_classes)
+  y_test = keras.utils.to_categorical(y_test, num_classes)
+  pruning_params = {
+      'pruning_schedule':
+          PolynomialDecay(
+              initial_sparsity=0.1,
+              final_sparsity=0.75,
+              begin_step=1000,
+              end_step=5000,
+              frequency=100)
+  }
+  layerwise_model = build_layerwise_model(input_shape, **pruning_params)
+  sequential_model = build_sequential_model(input_shape)
+  sequential_model = prune.prune_low_magnitude(
+      sequential_model, **pruning_params)
+  functional_model = build_functional_model(input_shape)
+  functional_model = prune.prune_low_magnitude(
+      functional_model, **pruning_params)
+  models = [layerwise_model, sequential_model, functional_model]
+  train_and_save(models, x_train, y_train, x_test, y_test)
+if __name__ == '__main__':
+  absl_app.run(main)

mnist_e2e_sparsity2x4.py ADDED Viewed

	@@ -0,0 +1,153 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=missing-docstring,protected-access
+"""Train a simple convnet on the MNIST dataset with sparsity 2x4.
+  It is based on mnist_e2e.py
+"""
+from __future__ import print_function
+from absl import app as absl_app
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.keras import test_utils as keras_test_utils
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.sparsity.keras import prune
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_utils
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper
+ConstantSparsity = pruning_schedule.ConstantSparsity
+l = keras.layers
+tf.random.set_seed(42)
+batch_size = 128
+num_classes = 10
+epochs = 1
+PRUNABLE_2x4_LAYERS = (keras.layers.Conv2D, keras.layers.Dense)
+def check_model_sparsity_2x4(model):
+  for layer in model.layers:
+    if isinstance(layer, pruning_wrapper.PruneLowMagnitude) and isinstance(
+        layer.layer, PRUNABLE_2x4_LAYERS):
+      for weight in layer.layer.get_prunable_weights():
+        if not pruning_utils.is_pruned_m_by_n(weight):
+          return False
+  return True
+def build_layerwise_model(input_shape, **pruning_params):
+  return keras.Sequential([
+      prune.prune_low_magnitude(
+          l.Conv2D(
+              32, 5, padding='same', activation='relu', input_shape=input_shape
+          ),
+          **pruning_params
+      ),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      prune.prune_low_magnitude(
+          l.Conv2D(64, 5, padding='same'), **pruning_params
+      ),
+      l.BatchNormalization(),
+      l.ReLU(),
+      l.MaxPooling2D((2, 2), (2, 2), padding='same'),
+      l.Flatten(),
+      prune.prune_low_magnitude(
+          l.Dense(1024, activation='relu'), **pruning_params
+      ),
+      l.Dropout(0.4),
+      l.Dense(num_classes, activation='softmax'),
+  ])
+def train(model, x_train, y_train, x_test, y_test):
+  model.compile(
+      loss=keras.losses.categorical_crossentropy,
+      optimizer='adam',
+      metrics=['accuracy'],
+  )
+  model.run_eagerly = True
+  # Print the model summary.
+  model.summary()
+  # Add a pruning step callback to peg the pruning step to the optimizer's
+  # step. Also add a callback to add pruning summaries to tensorboard
+  callbacks = [
+      pruning_callbacks.UpdatePruningStep(),
+      pruning_callbacks.PruningSummaries(log_dir='/tmp/logs')
+  ]
+  model.fit(
+      x_train,
+      y_train,
+      batch_size=batch_size,
+      epochs=epochs,
+      verbose=1,
+      callbacks=callbacks,
+      validation_data=(x_test, y_test))
+  score = model.evaluate(x_test, y_test, verbose=0)
+  print('Test loss:', score[0])
+  print('Test accuracy:', score[1])
+  # Check sparsity 2x4 type before stripping pruning
+  is_pruned_2x4 = check_model_sparsity_2x4(model)
+  print('Pass the check for sparsity 2x4: ', is_pruned_2x4)
+  model = prune.strip_pruning(model)
+  return model
+def main(unused_argv):
+  ##############################################################################
+  # Prepare training and testing data
+  ##############################################################################
+  (x_train, y_train), (
+      x_test,
+      y_test), input_shape = keras_test_utils.get_preprocessed_mnist_data()
+  ##############################################################################
+  # Train a model with sparsity 2x4.
+  ##############################################################################
+  pruning_params = {
+      'pruning_schedule': ConstantSparsity(0.5, begin_step=0, frequency=100),
+      'sparsity_m_by_n': (2, 4),
+  }
+  model = build_layerwise_model(input_shape, **pruning_params)
+  pruned_model = train(model, x_train, y_train, x_test, y_test)
+  # Write a model that has been pruned with 2x4 sparsity.
+  converter = tf.lite.TFLiteConverter.from_keras_model(pruned_model)
+  tflite_model = converter.convert()
+  tflite_model_path = '/tmp/mnist_2x4.tflite'
+  print('model is saved to {}'.format(tflite_model_path))
+  with open(tflite_model_path, 'wb') as f:
+    f.write(tflite_model)
+  print('evaluate pruned model: ')
+  print(keras_test_utils.eval_mnist_tflite(model_content=tflite_model))
+  # the accuracy of 2:4 pruning model is 0.9866
+  # the accuracy of unstructured model with 50% is 0.9863
+if __name__ == '__main__':
+  absl_app.run(main)

periodical_update_and_scheduling_test.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for when the training and inference graphs are the same."""
+import os
+import tempfile
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.common.keras.compression.algorithms import periodical_update_and_scheduling as svd
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.keras.testing import test_utils_mnist
+def _build_model():
+  i = keras.layers.Input(shape=(28, 28), name='input')
+  x = keras.layers.Reshape((28, 28, 1))(i)
+  x = keras.layers.Conv2D(
+      20, 5, activation='relu', padding='valid', name='conv1'
+  )(x)
+  x = keras.layers.MaxPool2D(2, 2)(x)
+  x = keras.layers.Conv2D(
+      50, 5, activation='relu', padding='valid', name='conv2'
+  )(x)
+  x = keras.layers.MaxPool2D(2, 2)(x)
+  x = keras.layers.Flatten()(x)
+  x = keras.layers.Dense(500, activation='relu', name='fc1')(x)
+  output = keras.layers.Dense(10, name='fc2')(x)
+  model = keras.Model(inputs=[i], outputs=[output])
+  return model
+def _get_dataset():
+  mnist = keras.datasets.mnist
+  (x_train, y_train), (x_test, y_test) = mnist.load_data()
+  x_train, x_test = x_train / 255.0, x_test / 255.0
+  # Use subset of 60000 examples to keep unit test speed fast.
+  x_train = x_train[0:1000]
+  y_train = y_train[0:1000]
+  return (x_train, y_train), (x_test, y_test)
+def _train_model(model):
+  loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+  model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
+  (x_train, y_train), _ = _get_dataset()
+  model.fit(x_train, y_train, epochs=1)
+def _save_as_saved_model(model):
+  saved_model_dir = tempfile.mkdtemp()
+  model.save(saved_model_dir)
+  return saved_model_dir
+# TODO(tfmot): reuse existing test utilities.
+def _convert_to_tflite(saved_model_dir):
+  _, tflite_file = tempfile.mkstemp()
+  converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+  tflite_model = converter.convert()
+  with open(tflite_file, 'wb') as f:
+    f.write(tflite_model)
+  return tflite_file
+def _get_directory_size_in_bytes(directory):
+  total = 0
+  try:
+    for entry in os.scandir(directory):
+      if entry.is_file():
+        # if it's a file, use stat() function
+        total += entry.stat().st_size
+      elif entry.is_dir():
+        # if it's a directory, recursively call this function
+        total += _get_directory_size_in_bytes(entry.path)
+  except NotADirectoryError:
+    # if `directory` isn't a directory, get the file size then
+    return os.path.getsize(directory)
+  except PermissionError:
+    # if for whatever reason we can't open the folder, return 0
+    return 0
+  return total
+class FunctionalTest(tf.test.TestCase):
+  # TODO(tfmot): can simplify to single layer test that checks exact
+  # dimensions of weights.
+  def testSVD_ReducesSavedModelSize(self):
+    model = _build_model()
+    original_saved_model_dir = _save_as_saved_model(model)
+    algorithm = svd.SVD(rank=16, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    compressed_model = algorithm.compress_model(training_model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    original_size = _get_directory_size_in_bytes(original_saved_model_dir)
+    compressed_size = _get_directory_size_in_bytes(saved_model_dir)
+    self.assertLess(compressed_size, original_size / 3)
+  def testSVD_HasReasonableAccuracy_TF(self):
+    model = _build_model()
+    algorithm = svd.SVD(rank=16, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    _train_model(training_model)
+    compressed_model = algorithm.compress_model(training_model)
+    _, (x_test, y_test) = _get_dataset()
+    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+    compressed_model.compile(
+        optimizer='adam', loss=loss_fn, metrics=['accuracy'])
+    results = compressed_model.evaluate(x_test, y_test)
+    self.assertGreater(results[1], 0.60)
+  def testSVD_ReducesTFLiteModelSize(self):
+    model = _build_model()
+    original_saved_model_dir = _save_as_saved_model(model)
+    original_tflite_file = _convert_to_tflite(original_saved_model_dir)
+    algorithm = svd.SVD(rank=16, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    compressed_model = algorithm.compress_model(training_model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    compressed_tflite_file = _convert_to_tflite(saved_model_dir)
+    original_size = os.path.getsize(original_tflite_file)
+    compressed_size = os.path.getsize(compressed_tflite_file)
+    self.assertLess(compressed_size, original_size / 6)
+  def testSVD_HasReasonableAccuracy_TFLite(self):
+    model = _build_model()
+    algorithm = svd.SVD(rank=16, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    _train_model(training_model)
+    compressed_model = algorithm.compress_model(training_model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    compressed_tflite_file = _convert_to_tflite(saved_model_dir)
+    accuracy = test_utils_mnist.eval_tflite(compressed_tflite_file)
+    self.assertGreater(accuracy, 0.60)
+  # TODO(tfmot): can simplify to single layer test.
+  def testSVD_BreaksDownLayerWeights(self):
+    model = _build_model()
+    first_conv_layer = model.layers[2]
+    self.assertLen(first_conv_layer.weights, 2)
+    algorithm = svd.SVD(rank=16, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    compressed_model = algorithm.compress_model(training_model)
+    first_conv_layer = compressed_model.layers[2]
+    self.assertLen(first_conv_layer.weights, 3)
+  # TODO(tfmot): can simplify to single layer test.
+  def testSVD_PreservesPretrainedWeights(self):
+    i = keras.layers.Input(shape=(2), name='input')
+    output = keras.layers.Dense(3, name='fc1')(i)
+    model = keras.Model(inputs=[i], outputs=[output])
+    dense_layer_weights = model.layers[1].get_weights()
+    algorithm = svd.SVD(rank=1, update_freq=1, warmup_step=10)
+    training_model = algorithm.optimize_model(model)
+    dense_layer_training_weights = training_model.layers[1].get_weights()
+    # kernel
+    algorithm.weight_reprs = []
+    algorithm.init_training_weights(dense_layer_weights[0])
+    w1_repr, w2_repr = algorithm.weight_reprs
+    assert (w1_repr.kwargs['initializer'](None) == \
+             dense_layer_training_weights[0]).numpy().all()
+    assert (w2_repr.kwargs['initializer'](None) == \
+             dense_layer_training_weights[1]).numpy().all()
+    # bias
+    assert (dense_layer_weights[1] == dense_layer_training_weights[2]).all()
+if __name__ == '__main__':
+  tf.test.main()

prune_preserve_quantize_registry.py ADDED Viewed

	@@ -0,0 +1,339 @@

+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Registry responsible for built-in keras classes."""
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.quantization.keras import quant_ops
+from tensorflow_model_optimization.python.core.quantization.keras import quantizers
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import (
+    default_8bit_quantize_registry,)
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import (
+    default_8bit_quantizers,)
+layers = keras.layers
+class _PrunePreserveInfo(object):
+  """PrunePreserveInfo."""
+  def __init__(self, weight_attrs, quantize_config_attrs):
+    """Initializes PrunePreserveInfo.
+    Args:
+      weight_attrs: list of sparsity preservable weight attributes of layer.
+      quantize_config_attrs: list of quantization configuration class name.
+    """
+    self.weight_attrs = weight_attrs
+    self.quantize_config_attrs = quantize_config_attrs
+class PrunePreserveQuantizeRegistry():
+  """PrunePreserveQuantizeRegistry responsible for built-in keras layers."""
+  # The keys represent built-in keras layers; the first values represent the
+  # the variables within the layers which hold the kernel weights, second
+  # values represent the class name of quantization configuration for layers.
+  # This decide the weights of layers with quantization configurations are
+  # sparsity preservable.
+  _LAYERS_CONFIG_MAP = {
+      layers.Conv2D:
+      _PrunePreserveInfo(['kernel'], ['Default8BitConvQuantizeConfig']),
+      layers.Dense:
+      _PrunePreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # DepthwiseConv2D is supported with 8bit qat, but not with prune,
+      # thus for DepthwiseConv2D PQAT, weights sparsity preserve is disabled.
+      layers.DepthwiseConv2D:
+      _PrunePreserveInfo(['depthwise_kernel'], ['Default8BitQuantizeConfig']),
+      # layers that supported with prune, but not yet with QAT
+      # layers.Conv1D:
+      # _PrunePreserveInfo(['kernel'], []),
+      # layers.Conv2DTranspose:
+      # _PrunePreserveInfo(['kernel'], []),
+      # layers.Conv3D:
+      # _PrunePreserveInfo(['kernel'], []),
+      # layers.Conv3DTranspose:
+      # _PrunePreserveInfo(['kernel'], []),
+      # layers.LocallyConnected1D:
+      # _PrunePreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # layers.LocallyConnected2D:
+      # _PrunePreserveInfo(['kernel'], ['Default8BitQuantizeConfig']),
+      # SeparableConv need verify from 8bit qat
+      # layers.SeparableConv1D:
+      # _PrunePreserveInfo(['pointwise_kernel'], \
+      #   ['Default8BitConvQuantizeConfig']),
+      # layers.SeparableConv2D:
+      # _PrunePreserveInfo(['pointwise_kernel'], \
+      #   ['Default8BitConvQuantizeConfig']),
+      # Embedding need verify from 8bit qat
+      # layers.Embedding: _PrunePreserveInfo(['embeddings'], []),
+  }
+  _DISABLE_PRUNE_PRESERVE = frozenset({
+      layers.DepthwiseConv2D,
+  })
+  def __init__(self):
+    self._config_quantizer_map = {
+        'Default8BitQuantizeConfig':
+        PrunePreserveDefault8BitWeightsQuantizer(),
+        'Default8BitConvQuantizeConfig':
+        PrunePreserveDefault8BitConvWeightsQuantizer(),
+    }
+  @classmethod
+  def _no_trainable_weights(cls, layer):
+    """Returns whether this layer has trainable weights.
+    Args:
+      layer: The layer to check for trainable weights.
+    Returns:
+      True/False whether the layer has trainable weights.
+    """
+    return not layer.trainable_weights
+  @classmethod
+  def _disable_prune_preserve(cls, layer):
+    """Returns whether disable this layer for prune preserve.
+    Args:
+      layer: The layer to check for disable.
+    Returns:
+      True/False whether disable this layer for prune preserve.
+    """
+    return layer.__class__ in cls._DISABLE_PRUNE_PRESERVE
+  @classmethod
+  def supports(cls, layer):
+    """Returns whether the registry supports this layer type.
+    Args:
+      layer: The layer to check for support.
+    Returns:
+      True/False whether the layer type is supported.
+    """
+    # layers without trainable weights are considered supported,
+    # e.g., ReLU, Softmax, and AveragePooling2D.
+    if cls._no_trainable_weights(layer):
+      return True
+    if layer.__class__ in cls._LAYERS_CONFIG_MAP:
+      return True
+    return False
+  @classmethod
+  def _weight_names(cls, layer):
+    """Gets the weight names."""
+    if cls._no_trainable_weights(layer):
+      return []
+    return cls._LAYERS_CONFIG_MAP[layer.__class__].weight_attrs
+  @classmethod
+  def get_sparsity_preservable_weights(cls, layer):
+    """Gets sparsity preservable weights from keras layer.
+    Args:
+      layer: instance of keras layer
+    Returns:
+      List of sparsity preservable weights
+    """
+    return [getattr(layer, weight) for weight in cls._weight_names(layer)]
+  @classmethod
+  def get_suppport_quantize_config_names(cls, layer):
+    """Gets class name of supported quantize config for layer.
+    Args:
+      layer: instance of keras layer
+    Returns:
+      List of supported quantize config class name.
+    """
+    # layers without trainable weights don't need quantize_config for pqat
+    if cls._no_trainable_weights(layer):
+      return []
+    return cls._LAYERS_CONFIG_MAP[layer.__class__].quantize_config_attrs
+  def apply_sparsity_preserve_quantize_config(self, layer, quantize_config):
+    """Applies weights sparsity preservation.
+    Args:
+      layer: The layer to check for support.
+      quantize_config: quantization config to check for support,
+        apply sparsity preservation to pruned weights
+    Raises:
+      ValueError when layer is supported does not have quantization config.
+    Returns:
+      Returns quantize_config with addon sparsity preserve weight_quantizer.
+    """
+    if self.supports(layer):
+      if (self._no_trainable_weights(layer) or
+          self._disable_prune_preserve(layer)):
+        return quantize_config
+      if (quantize_config.__class__.__name__
+          in self._LAYERS_CONFIG_MAP[layer.__class__].quantize_config_attrs):
+        quantize_config.weight_quantizer = self._config_quantizer_map[
+            quantize_config.__class__.__name__]
+      else:
+        raise ValueError('Configuration {} is not supported for Layer {}.'
+                         .format(str(quantize_config.__class__.__name__),
+                                 str(layer.__class__.__name__)))
+    else:
+      raise ValueError('Layer {} is not supported.'.format(
+          str(layer.__class__.__name__)))
+    return quantize_config
+class Default8bitPrunePreserveQuantizeRegistry(PrunePreserveQuantizeRegistry):
+  """Default 8 bit PrunePreserveQuantizeRegistry."""
+  def get_quantize_config(self, layer):
+    """Returns the quantization config with addon sparsity.
+    Args:
+      layer: input layer to return quantize config for.
+    Returns:
+      Returns the quantization config with sparsity preserve weight_quantizer.
+    """
+    quantize_config = (default_8bit_quantize_registry
+                       .Default8BitQuantizeRegistry()
+                       .get_quantize_config(layer))
+    prune_aware_quantize_config = self.apply_sparsity_preserve_quantize_config(
+        layer, quantize_config)
+    return prune_aware_quantize_config
+class PrunePreserveDefaultWeightsQuantizer(quantizers.LastValueQuantizer):
+  """Quantize weights while preserve sparsity."""
+  def __init__(self, num_bits, per_axis, symmetric, narrow_range):
+    """Initializes PrunePreserveDefaultWeightsQuantizer.
+    Args:
+      num_bits: Number of bits for quantization
+      per_axis: Whether to apply per_axis quantization. The last dimension is
+        used as the axis.
+      symmetric: If true, use symmetric quantization limits instead of training
+        the minimum and maximum of each quantization range separately.
+      narrow_range: In case of 8 bits, narrow_range nudges the quantized range
+        to be [-127, 127] instead of [-128, 127]. This ensures symmetric range
+        has 0 as the centre.
+    """
+    quantizers.LastValueQuantizer.__init__(self, num_bits, per_axis, symmetric,
+                                           narrow_range)
+  def _build_sparsity_mask(self, name, layer):
+    weights = getattr(layer.layer, name)
+    sparsity_mask = tf.math.divide_no_nan(weights, weights)
+    return {'sparsity_mask': sparsity_mask}
+  def build(self, tensor_shape, name, layer):
+    """Constructs mask to preserve weights sparsity.
+    Args:
+      tensor_shape: Shape of weights which needs to be quantized.
+      name: Name of weights in layer.
+      layer: quantization wrapped keras layer.
+    Returns:
+      Dictionary of constructed sparsity mask and
+      quantization params, the dictionary will be passed
+      to __call__ function.
+    """
+    result = self._build_sparsity_mask(name, layer)
+    result.update(
+        super(PrunePreserveDefaultWeightsQuantizer,
+              self).build(tensor_shape, name, layer))
+    return result
+  def __call__(self, inputs, training, weights, **kwargs):
+    """Applies sparsity preserved quantization to the input tensor.
+    Args:
+      inputs: Input tensor (layer's weights) to be quantized.
+      training: Whether the graph is currently training.
+      weights: Dictionary of weights (params) the quantizer can use to
+        quantize the tensor (layer's weights). This contains the weights
+        created in the `build` function.
+      **kwargs: Additional variables which may be passed to the quantizer.
+    Returns:
+      quantized tensor.
+    """
+    prune_preserve_inputs = tf.multiply(inputs, weights['sparsity_mask'])
+    return quant_ops.LastValueQuantize(
+        prune_preserve_inputs,
+        weights['min_var'],
+        weights['max_var'],
+        is_training=training,
+        num_bits=self.num_bits,
+        per_channel=self.per_axis,
+        symmetric=self.symmetric,
+        narrow_range=self.narrow_range,
+    )
+class PrunePreserveDefault8BitWeightsQuantizer(
+    PrunePreserveDefaultWeightsQuantizer):
+  """PrunePreserveWeightsQuantizer for default 8bit weights."""
+  def __init__(self):
+    super(PrunePreserveDefault8BitWeightsQuantizer,
+          self).__init__(num_bits=8,
+                         per_axis=False,
+                         symmetric=True,
+                         narrow_range=True)
+class PrunePreserveDefault8BitConvWeightsQuantizer(
+    PrunePreserveDefaultWeightsQuantizer,
+    default_8bit_quantizers.Default8BitConvWeightsQuantizer,):
+  """PrunePreserveWeightsQuantizer for default 8bit Conv2D/DepthwiseConv2D weights."""
+  # pylint: disable=super-init-not-called
+  def __init__(self):
+    # Skip PrunePreserveDefaultWeightsQuantizer since they have the same super.
+    default_8bit_quantizers.Default8BitConvWeightsQuantizer.__init__(self)
+  def build(self, tensor_shape, name, layer):
+    result = PrunePreserveDefaultWeightsQuantizer._build_sparsity_mask(
+        self, name, layer)
+    result.update(
+        default_8bit_quantizers.Default8BitConvWeightsQuantizer.build(
+            self, tensor_shape, name, layer))
+    return result

readme.txt ADDED Viewed

	@@ -0,0 +1,204 @@

+ixl iWARP FreeBSD* driver for Intel(R) Ethernet Connection X722
+================================================================
+July 9, 2019
+Contents
+========
+- Prerequisites
+- Building and Installation
+- Testing
+- Configuration
+- Interoperability
+- Known Issues
+Prerequisites
+=============
+- FreeBSD version 11.2
+- Kernel configuration:
+    Please add the following kernel configuration options:
+	include GENERIC
+	options COMPAT_LINUXKPI
+	options IPOIB_CM
+	options IXL_IW
+	nodevice ixl
+	nodevice iavf
+	Note: IXL_IW is required for FreeBSD-CURRENT branch.
+- For the iw_ixl driver to work, an if_ixl driver with iwarp interface
+  is required. The interface is available in if_ixl version 1.7.12 or later.
+  It should be enabled prior to usage, as the setting is switched off by
+  default. To enable iwarp compatibility, add
+	hw.ixl.enable_iwarp=1
+to
+	/boot/loader.conf
+The lan driver can be downloaded from
+https://downloadcenter.intel.com/download/25160/Ethernet-Intel-Network-Adapter-D
+river-for-PCIe-40-Gigabit-Ethernet-Network-Connection-under-FreeBSD
+Or search on downloadcenter.intel.com using '40 Gigabit Ethernet Network
+Connection under FreeBSD'. Newer OS releases contain the if_ixl driver in
+the ixl driver version 1.7.12-k or later.
+There are some known issues with the interface on if_ixl-1.7.12. Please
+use version 1.7.13 or later.
+- fastreg memory mode in krping needs a patch applied to krping.
+  Refer to the 'Testing' and 'Known Issues' sections for details.
+Building and Installation
+=========================
+1. Untar ixl-<version>.tar.gz and iw_ixl-<version>.tar.gz
+	# tar -xf ixl-<version>.tar.gz
+	# tar -xf iw_ixl-<version>.tar.gz
+2. Install the if_ixl driver:
+	# cd ixl-<version>/src directory
+	# make
+	# make install
+3. Install the iw_ixl driver:
+	# cd iw_ixl-<version>/src
+	# make clean
+	# make IXL_DIR=$PATH_TO_IXL/ixl-<version>/src
+	# make install
+4. Install the man page for the iw_ixl driver by copying the iw_ixl.4.gz file
+   to the directory where manual pages are held on your system. For instance:
+	# cp iw_ixl-<version>/doc/iw_ixl.4.gz /usr/share/man/man4/
+For in-tree driver if_ixl-1.7.12-k or later, it is sufficient to follow
+the instruction from point 3 but ensure the correct path to if_ixl source
+folder is supplied. For instance:
+	IXL_DIR=/usr/src/sys/dev/ixl/
+Testing
+-------
+1. To load the iw_ixl driver, call:
+	# kldload iw_ixl
+   If if_ixl is not already loaded, the system will load it on its own.
+   Please remember to add
+	hw.ixl.enable_iwarp=1
+   to /boot/loader.conf file prior to if_ixl loading, to ensure the ixl
+   driver has the iwarp interface enabled.
+2. To validate the load of the driver, check:
+	# sysctl -a | grep infiniband
+   A number of sys.class.infiniband should appear, provided at least one
+   port of the X722 is up.
+3. The source code for krping software is provided with the kernel in
+/usr/src/sys/contrib/rdma/krping/. To compile the software, change directory
+to /usr/src/sys/modules/rdma/krping/ and invoke the following:
+	# make clean
+	# make
+	# make install
+4. Start krping server on one machine:
+	# echo size=64,count=1,port=6601,addr=100.0.0.189,server > /dev/krping
+5. Connect client from another machine:
+	# echo size=64,count=1,port=6601,addr=100.0.0.189,client > /dev/krping
+Configuration
+=============
+The following sysctl options are visible:
+  - hw.iw_ixl.max_ceq
+	determines the maximum number of msix vectors available to the driver
+	for CEQ usage.
+  - hw.iw_ixl.debug
+	defines level of debug messages.
+  - hw.iw_ixl.mpa_version
+	shows the current MPA version used.
+The max_ceq setting may be changed by adding:
+	hw.iw_ixl.max_ceq=$value
+to /boot/loader.conf file. The final number of CEQ is evaluated depending
+on the available msix vectors, number of cpu cores, and hardware limits.
+If max_ceq=0, the value is ignored.
+The debug setting may be changed either by adding:
+	hw.iw_ixl.debug=$value
+to the /boot/loader.conf file or by calling
+	sysctl hw.iw_ixl.debug=$value
+The mpa_version may be changed by adding:
+	hw.iw_ixl.mpa_version=$value
+to the /boot/loader.conf file.
+Interoperability
+================
+To interoperate with Chelsio iWARP devices:
+1. Load the ixl driver with parameter mpa_version set to 1. Add the line:
+	hw.iw_ixl.mpa_version=1
+to /boot/loader.conf
+2. Load Chelsio T4/T5 RDMA driver (iw_cxgb4) with parameter dack_mode set to 0.
+Known Issues
+============
+- Loopback is not supported.
+- MTU changes are not supported.
+- IPv6 is not supported.
+- MW memory mode is not supported.
+- MR memory mode supports only single buffer.
+- The function ib_cq_resize is not supported.
+- The max number of registered cq, qp, pd or mr reported by the device may
+  differ from the actual number of registrations achievable.
+- A kernel crash may occur when trying to run krping without ensuring that the
+  two machines are able to ping each other.
+- A kernel crash may occur when trying to load the iw_ixl driver when
+  hw.ixl.enable_iwarp=0 (fixed with if_ixl 1.7.13).
+- A kernel crash may occur when loading the iw_ixl driver on a card that is
+  supported by if_ixl driver, but does not have iWARP capability (fixed with
+  if_ixl 1.7.13).
+- Krping with fastreg memory mode will not work unless some changes are made
+  to krping. To work around the issue, modify the krping_rdma_rkey function
+  such that, in the case of FASTREG memory mode, the ib_post_send function
+  with &cd->invalidate_wr parameter is not called during the first run of
+  the function.
+Support
+=======
+For general information, go to the Intel support website at:
+http://www.intel.com/support/
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the issue
+to e1000-rdma@lists.sourceforge.net
+Copyright(c) 2017-2019 Intel Corporation.
+Trademarks
+==========
+Intel is a trademark or registered trademark of Intel Corporation or its
+subsidiaries in the United States and/or other countries.
+* Other names and brands may be claimed as the property of others.

same_training_and_inference_test.py ADDED Viewed

	@@ -0,0 +1,210 @@

+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for when the training and inference graphs are the same."""
+import os
+import tempfile
+import tensorflow as tf
+from tensorflow_model_optimization.python.core.common.keras.compression.algorithms import same_training_and_inference as svd
+from tensorflow_model_optimization.python.core.keras.compat import keras
+from tensorflow_model_optimization.python.core.keras.testing import test_utils_mnist
+def _build_model():
+  i = keras.layers.Input(shape=(28, 28), name='input')
+  x = keras.layers.Reshape((28, 28, 1))(i)
+  x = keras.layers.Conv2D(
+      20, 5, activation='relu', padding='valid', name='conv1'
+  )(x)
+  x = keras.layers.MaxPool2D(2, 2)(x)
+  x = keras.layers.Conv2D(
+      50, 5, activation='relu', padding='valid', name='conv2'
+  )(x)
+  x = keras.layers.MaxPool2D(2, 2)(x)
+  x = keras.layers.Flatten()(x)
+  x = keras.layers.Dense(500, activation='relu', name='fc1')(x)
+  output = keras.layers.Dense(10, name='fc2')(x)
+  model = keras.Model(inputs=[i], outputs=[output])
+  return model
+def _get_dataset():
+  mnist = keras.datasets.mnist
+  (x_train, y_train), (x_test, y_test) = mnist.load_data()
+  x_train, x_test = x_train / 255.0, x_test / 255.0
+  # Use subset of 60000 examples to keep unit test speed fast.
+  x_train = x_train[0:1000]
+  y_train = y_train[0:1000]
+  return (x_train, y_train), (x_test, y_test)
+def _train_model(model):
+  loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+  model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
+  (x_train, y_train), _ = _get_dataset()
+  model.fit(x_train, y_train, epochs=1)
+def _save_as_saved_model(model):
+  saved_model_dir = tempfile.mkdtemp()
+  model.save(saved_model_dir)
+  return saved_model_dir
+# TODO(tfmot): reuse existing test utilities.
+def _convert_to_tflite(saved_model_dir):
+  _, tflite_file = tempfile.mkstemp()
+  converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
+  tflite_model = converter.convert()
+  with open(tflite_file, 'wb') as f:
+    f.write(tflite_model)
+  return tflite_file
+def _get_directory_size_in_bytes(directory):
+  total = 0
+  try:
+    for entry in os.scandir(directory):
+      if entry.is_file():
+        # if it's a file, use stat() function
+        total += entry.stat().st_size
+      elif entry.is_dir():
+        # if it's a directory, recursively call this function
+        total += _get_directory_size_in_bytes(entry.path)
+  except NotADirectoryError:
+    # if `directory` isn't a directory, get the file size then
+    return os.path.getsize(directory)
+  except PermissionError:
+    # if for whatever reason we can't open the folder, return 0
+    return 0
+  return total
+class FunctionalTest(tf.test.TestCase):
+  # TODO(tfmot): can simplify to single layer test that checks exact
+  # dimensions of weights.
+  def testSVD_ReducesSavedModelSize(self):
+    model = _build_model()
+    original_saved_model_dir = _save_as_saved_model(model)
+    compressed_model = svd.SVD(rank=16).compress_model(model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    original_size = _get_directory_size_in_bytes(original_saved_model_dir)
+    compressed_size = _get_directory_size_in_bytes(saved_model_dir)
+    self.assertLess(compressed_size, original_size / 3)
+  def testSVD_HasReasonableAccuracy_TF(self):
+    model = _build_model()
+    compressed_model = svd.SVD(rank=16).compress_model(model)
+    _train_model(compressed_model)
+    _, (x_test, y_test) = _get_dataset()
+    loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+    compressed_model.compile(
+        optimizer='adam', loss=loss_fn, metrics=['accuracy'])
+    results = compressed_model.evaluate(x_test, y_test)
+    self.assertGreater(results[1], 0.60)
+  def testSVD_ReducesTFLiteModelSize(self):
+    model = _build_model()
+    original_saved_model_dir = _save_as_saved_model(model)
+    original_tflite_file = _convert_to_tflite(original_saved_model_dir)
+    compressed_model = svd.SVD(rank=16).compress_model(model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    compressed_tflite_file = _convert_to_tflite(saved_model_dir)
+    original_size = os.path.getsize(original_tflite_file)
+    compressed_size = os.path.getsize(compressed_tflite_file)
+    self.assertLess(compressed_size, original_size / 6)
+  def testSVD_HasReasonableAccuracy_TFLite(self):
+    model = _build_model()
+    compressed_model = svd.SVD(rank=16).compress_model(model)
+    _train_model(compressed_model)
+    saved_model_dir = _save_as_saved_model(compressed_model)
+    compressed_tflite_file = _convert_to_tflite(saved_model_dir)
+    accuracy = test_utils_mnist.eval_tflite(compressed_tflite_file)
+    self.assertGreater(accuracy, 0.60)
+  # TODO(tfmot): can simplify to single layer test.
+  def testSVD_BreaksDownLayerWeights(self):
+    model = _build_model()
+    first_conv_layer = model.layers[2]
+    self.assertLen(first_conv_layer.weights, 2)
+    compressed_model = svd.SVD(rank=16).compress_model(model)
+    first_conv_layer = compressed_model.layers[2]
+    self.assertLen(first_conv_layer.weights, 3)
+  # TODO(tfmot): can simplify to single layer test.
+  def testSVD_PreservesPretrainedWeights(self):
+    i = keras.layers.Input(shape=(2), name='input')
+    output = keras.layers.Dense(3, name='fc1')(i)
+    model = keras.Model(inputs=[i], outputs=[output])
+    dense_layer_weights = model.layers[1].get_weights()
+    algorithm = svd.SVD(rank=1)
+    compressed_model = algorithm.compress_model(model)
+    dense_layer_compressed_weights = compressed_model.layers[1].get_weights()
+    # kernel
+    algorithm.weight_reprs = []
+    algorithm.init_training_weights(dense_layer_weights[0])
+    w1_repr, w2_repr = algorithm.weight_reprs
+    assert (w1_repr.kwargs['initializer'](None) == \
+             dense_layer_compressed_weights[0]).numpy().all()
+    assert (w2_repr.kwargs['initializer'](None) == \
+             dense_layer_compressed_weights[1]).numpy().all()
+    # bias
+    assert (dense_layer_weights[1] == dense_layer_compressed_weights[2]).all()
+if __name__ == '__main__':
+  tf.test.main()