Source code for tensorlayer.layers.dense.quan_dense_bn

#! /usr/bin/python
# -*- coding: utf-8 -*-

import tensorflow as tf

from tensorlayer.layers.core import Layer
from tensorlayer.layers.core import LayersConfig
from tensorflow.python.training import moving_averages

from tensorlayer.layers.utils import quantize_active_overflow
from tensorlayer.layers.utils import quantize_weight_overflow

from tensorlayer import logging

from tensorlayer.decorators import deprecated_alias

__all__ = [
    'QuanDenseLayerWithBN',
]


[docs]class QuanDenseLayerWithBN(Layer): """The :class:`QuanDenseLayer` class is a quantized fully connected layer with BN, which weights are 'bitW' bits and the output of the previous layer are 'bitA' bits while inferencing. Parameters ---------- prev_layer : :class:`Layer` Previous layer. n_units : int The number of units of this layer. act : activation function The activation function of this layer. decay : float A decay factor for `ExponentialMovingAverage`. Suggest to use a large value for large dataset. epsilon : float Eplison. is_train : boolean Is being used for training or inference. beta_init : initializer or None The initializer for initializing beta, if None, skip beta. Usually you should not skip beta unless you know what happened. gamma_init : initializer or None The initializer for initializing gamma, if None, skip gamma. bitW : int The bits of this layer's parameter bitA : int The bits of the output of previous layer decay : float A decay factor for `ExponentialMovingAverage`. Suggest to use a large value for large dataset. epsilon : float Eplison. is_train : boolean Is being used for training or inference. beta_init : initializer or None The initializer for initializing beta, if None, skip beta. Usually you should not skip beta unless you know what happened. gamma_init : initializer or None The initializer for initializing gamma, if None, skip gamma. use_gemm : boolean If True, use gemm instead of ``tf.matmul`` for inferencing. (TODO). W_init : initializer The initializer for the the weight matrix. W_init_args : dictionary The arguments for the weight matrix initializer. name : a str A unique layer name. """ @deprecated_alias(layer='prev_layer', end_support_version=1.9) # TODO remove this line for the 1.9 release def __init__( self, prev_layer, n_units=100, act=None, decay=0.9, epsilon=1e-5, is_train=False, bitW=8, bitA=8, gamma_init=tf.ones_initializer, beta_init=tf.zeros_initializer, use_gemm=False, W_init=tf.truncated_normal_initializer(stddev=0.1), W_init_args=None, name='quan_dense_with_bn', ): super(QuanDenseLayerWithBN, self).__init__(prev_layer=prev_layer, act=act, W_init_args=W_init_args, name=name) logging.info( "QuanDenseLayerWithBN %s: %d %s" % (self.name, n_units, self.act.__name__ if self.act is not None else 'No Activation') ) if self.inputs.get_shape().ndims != 2: raise Exception("The input dimension must be rank 2, please reshape or flatten it") if use_gemm: raise Exception("TODO. The current version use tf.matmul for inferencing.") n_in = int(self.inputs.get_shape()[-1]) x = self.inputs self.inputs = quantize_active_overflow(self.inputs, bitA) self.n_units = n_units with tf.variable_scope(name): W = tf.get_variable( name='W', shape=(n_in, n_units), initializer=W_init, dtype=LayersConfig.tf_dtype, **self.W_init_args ) mid_out = tf.matmul(x, W) para_bn_shape = mid_out.get_shape()[-1:] if gamma_init: scale_para = tf.get_variable( name='scale_para', shape=para_bn_shape, initializer=gamma_init, dtype=LayersConfig.tf_dtype, trainable=is_train ) else: scale_para = None if beta_init: offset_para = tf.get_variable( name='offset_para', shape=para_bn_shape, initializer=beta_init, dtype=LayersConfig.tf_dtype, trainable=is_train ) else: offset_para = None moving_mean = tf.get_variable( 'moving_mean', para_bn_shape, initializer=tf.constant_initializer(1.), dtype=LayersConfig.tf_dtype, trainable=False ) moving_variance = tf.get_variable( 'moving_variance', para_bn_shape, initializer=tf.constant_initializer(1.), dtype=LayersConfig.tf_dtype, trainable=False, ) mean, variance = tf.nn.moments(mid_out, list(range(len(mid_out.get_shape()) - 1))) update_moving_mean = moving_averages.assign_moving_average( moving_mean, mean, decay, zero_debias=False ) # if zero_debias=True, has bias update_moving_variance = moving_averages.assign_moving_average( moving_variance, variance, decay, zero_debias=False ) # if zero_debias=True, has bias def mean_var_with_update(): with tf.control_dependencies([update_moving_mean, update_moving_variance]): return tf.identity(mean), tf.identity(variance) if is_train: mean, var = mean_var_with_update() else: mean, var = moving_mean, moving_variance w_fold = _w_fold(W, scale_para, var, epsilon) bias_fold = _bias_fold(offset_para, scale_para, mean, var, epsilon) W = quantize_weight_overflow(w_fold, bitW) # W = tl.act.sign(W) # dont update ... # W = tf.Variable(W) self.outputs = tf.matmul(self.inputs, W) # self.outputs = xnor_gemm(self.inputs, W) # TODO self.outputs = tf.nn.bias_add(self.outputs, bias_fold, name='bias_add') self.outputs = self._apply_activation(self.outputs) self._add_layers(self.outputs) self._add_params([W, scale_para, offset_para, moving_mean, moving_variance])
def _w_fold(w, gama, var, epsilon): return tf.div(tf.multiply(gama, w), tf.sqrt(var + epsilon)) def _bias_fold(beta, gama, mean, var, epsilon): return tf.subtract(beta, tf.div(tf.multiply(gama, mean), tf.sqrt(var + epsilon)))