#! /usr/bin/python
# -*- coding: utf8 -*-
import tensorflow as tf
import time
from . import visualize
from . import utils
from . import files
from . import cost
from . import iterate
import numpy as np
from six.moves import xrange
import random
import warnings
# __all__ = [
# "Layer",
# "DenseLayer",
# ]
## Dynamically creat variables for keep prob
# set_keep = locals()
set_keep = globals()
set_keep['_layers_name_list'] =[]
set_keep['name_reuse'] = False
## Variable Operation
[docs]def flatten_reshape(variable, name=''):
"""Reshapes high-dimension input to a vector.
[batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
Parameters
----------
variable : a tensorflow variable
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> W_conv2 = weight_variable([5, 5, 100, 32]) # 64 features for each 5x5 patch
>>> b_conv2 = bias_variable([32])
>>> W_fc1 = weight_variable([7 * 7 * 32, 256])
>>> h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
>>> h_pool2 = max_pool_2x2(h_conv2)
>>> h_pool2.get_shape()[:].as_list() = [batch_size, 7, 7, 32]
... [batch_size, mask_row, mask_col, n_mask]
>>> h_pool2_flat = tl.layers.flatten_reshape(h_pool2)
... [batch_size, mask_row * mask_col * n_mask]
>>> h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob)
...
"""
dim = 1
for d in variable.get_shape()[1:].as_list():
dim *= d
return tf.reshape(variable, shape=[-1, dim], name=name)
[docs]def clear_layers_name():
"""Clear all layer names in set_keep['_layers_name_list'],
enable layer name reuse.
Examples
---------
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.DenseLayer(network, n_units=800, name='relu1')
...
>>> tl.layers.clear_layers_name()
>>> network2 = tl.layers.InputLayer(x, name='input_layer')
>>> network2 = tl.layers.DenseLayer(network2, n_units=800, name='relu1')
...
"""
set_keep['_layers_name_list'] =[]
[docs]def set_name_reuse(enable=True):
"""Enable or disable reuse layer name. By default, each layer must has unique
name. When you want two or more input placeholder (inference) share the same
model parameters, you need to enable layer name reuse, then allow the
parameters have same name scope.
Examples
------------
see ``tutorial_ptb_lstm.py`` for example.
"""
set_keep['name_reuse'] = enable
[docs]def initialize_rnn_state(state):
"""Return the initialized RNN state.
The input is LSTMStateTuple or State of RNNCells.
"""
if isinstance(state, tf.nn.rnn_cell.LSTMStateTuple):
# when state_is_tuple=True for LSTM
# print(state)
# print(state.c)
# print(state.h)
# print(state.c.eval())
# print(state.h.eval())
# exit()
c = state.c.eval()
h = state.h.eval()
return (c, h)
# # print(state)
# # print(state[0])
# new_state = state
# new_state[0].assign(state[0].eval())
# new_state[1].assign(state[1].eval())
# # state[0] = state[0].eval()
# # state[1] = state[1].eval()
# # state.c = state.c.eval()
# # state.h = state.h.eval()
# return new_state
else:
# when state_is_tuple=False for LSTM
# or other RNNs
new_state = state.eval()
return new_state
[docs]def print_all_variables(train_only=False):
"""Print all trainable and non-trainable variables
without initialize_all_variables()
Parameters
----------
train_only : boolen
If True, only print the trainable variables, otherwise, print all variables.
"""
tvar = tf.trainable_variables() if train_only else tf.all_variables()
for idx, v in enumerate(tvar):
print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name))
# def print_all_variables():
# """Print all trainable and non-trainable variables
# without initialize_all_variables()"""
# for idx, v in enumerate(tf.all_variables()):
# # print(" var %d: %s %s" % (idx, v.get_shape(), v.name))
# print(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name))
# Basic layer
[docs]class Layer(object):
"""
The :class:`Layer` class represents a single layer of a neural network. It
should be subclassed when implementing new types of layers.
Because each layer can keep track of the layer(s) feeding into it, a
network's output :class:`Layer` instance can double as a handle to the full
network.
Parameters
----------
inputs : a :class:`Layer` instance
The `Layer` class feeding into this layer.
name : a string or None
An optional name to attach to this layer.
"""
def __init__(
self,
inputs = None,
name ='layer'
):
self.inputs = inputs
# if name in globals():
if (name in set_keep['_layers_name_list']) and name_reuse == False:
raise Exception("Layer '%s' already exists, please choice other 'name'.\
\nHint : Use different name for different 'Layer'" % name)
else:
self.name = name
set_keep['_layers_name_list'].append(name)
def print_params(self, details=True):
''' Print all info of parameters in the network'''
# try:
for i, p in enumerate(self.all_params):
if details:
try:
# print(" param %d: %s (mean: %f, median: %f, std: %f) %s" % (i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
print(" param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18}) {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
except:
raise Exception("Hint: print params details after sess.run(tf.initialize_all_variables()) or use network.print_params(False).")
else:
print(" param {:3}: {:15} {}".format(i, str(p.get_shape()), p.name))
print(" num of params: %d" % self.count_params())
# except:
# raise Exception("Hint: print params after sess.run(tf.initialize_all_variables()) or use tl.layers.print_all_variables()")
def print_layers(self):
''' Print all info of layers in the network '''
for i, p in enumerate(self.all_layers):
# print(vars(p))
print(" layer %d: %s" % (i, str(p)))
def count_params(self):
''' Return the number of parameters in the network '''
n_params = 0
for i, p in enumerate(self.all_params):
n = 1
# for s in p.eval().shape:
for s in p.get_shape():
try:
s = int(s)
except:
s = 1
if s:
n = n * s
n_params = n_params + n
return n_params
# def print_params(self):
# ''' Print all info of parameters in the network after initialize_all_variables()'''
# try:
# for i, p in enumerate(self.all_params):
# print(" param %d: %s (mean: %f, median: %f, std: %f) %s" % (i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
# print(" num of params: %d" % self.count_params())
# except:
# raise Exception("Hint: print params after sess.run(tf.initialize_all_variables()) or use tl.layers.print_all_variables()")
#
#
# def print_layers(self):
# ''' Print all info of layers in the network '''
# for i, p in enumerate(self.all_layers):
# # print(vars(p))
# print(" layer %d: %s" % (i, str(p)))
#
# def count_params(self):
# ''' Return the number of parameters in the network '''
# n_params = 0
# for i, p in enumerate(self.all_params):
# n = 1
# for s in p.eval().shape:
# # for s in p.get_shape():
# # s = int(s)
# if s:
# n = n * s
# n_params = n_params + n
# return n_params
def __str__(self):
print("\nIt is a Layer class")
self.print_params(False)
self.print_layers()
return " Last layer is: %s" % self.__class__.__name__
# Input layer
# Word Embedding Input layer
# Dense layer
[docs]class DenseLayer(Layer):
"""
The :class:`DenseLayer` class is a fully connected layer.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
n_units : int
The number of units of the layer.
act : activation function
The function that is applied to the layer activations.
W_init : weights initializer
The initializer for initializing the weight matrix.
b_init : biases initializer
The initializer for initializing the bias vector.
W_init_args : dictionary
The arguments for the weights tf.get_variable.
b_init_args : dictionary
The arguments for the biases tf.get_variable.
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.DenseLayer(
... network,
... n_units=800,
... act = tf.nn.relu,
... W_init=tf.truncated_normal_initializer(stddev=0.1),
... name ='relu_layer'
... )
>>> Without TensorLayer, you can do as follow.
>>> W = tf.Variable(
... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W')
>>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b')
>>> y = tf.nn.relu(tf.matmul(inputs, W) + b)
Note
-----
If the input to this layer has more than two axes, it need to flatten the
input by using :class:`FlattenLayer` in this case.
"""
def __init__(
self,
layer = None,
n_units = 100,
act = tf.nn.relu,
W_init = tf.truncated_normal_initializer(stddev=0.1),
b_init = tf.constant_initializer(value=0.0),
W_init_args = {},
b_init_args = {},
name ='dense_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
if self.inputs.get_shape().ndims != 2:
raise Exception("The input dimension must be rank 2, please reshape or flatten it")
n_in = int(self.inputs._shape[-1])
self.n_units = n_units
print(" tensorlayer:Instantiate DenseLayer %s: %d, %s" % (self.name, self.n_units, act.__name__))
with tf.variable_scope(name) as vs:
W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
self.outputs = act(tf.matmul(self.inputs, W) + b)#, name=name) # 1.2
# self.outputs = act(tf.matmul(self.inputs, W) + b)
# Hint : list(), dict() is pass by value (shallow), without them, it is
# pass by reference.
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
# shallow cope allows the weights in network can be changed at the same
# time, when ReconLayer updates the weights of encoder.
#
# e.g. the encoder points to same physical memory address
# network = InputLayer(x, name='input_layer')
# network = DenseLayer(network, n_units=200, act = tf.nn.sigmoid, name='sigmoid')
# recon_layer = ReconLayer(network, n_units=784, act = tf.nn.sigmoid, name='recon_layer')
# print(network.all_params)
# [<tensorflow.python.ops.variables.Variable object at 0x10d616f98>,
# <tensorflow.python.ops.variables.Variable object at 0x10d8f6080>]
# print(len(network.all_params))
# 2
# print(recon_layer.all_params)
# [<tensorflow.python.ops.variables.Variable object at 0x10d616f98>,
# <tensorflow.python.ops.variables.Variable object at 0x10d8f6080>,
# <tensorflow.python.ops.variables.Variable object at 0x10d8f6550>,
# <tensorflow.python.ops.variables.Variable object at 0x10d8f6198>]
# print(len(recon_layer.all_params))
# 4
[docs]class ReconLayer(DenseLayer):
"""
The :class:`ReconLayer` class is a reconstruction layer `DenseLayer` which
use to pre-train a `DenseLayer`.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
x_recon : tensorflow variable
The variables used for reconstruction.
name : a string or None
An optional name to attach to this layer.
n_units : int
The number of units of the layer, should be equal to x_recon
act : activation function
The activation function that is applied to the reconstruction layer.
Normally, for sigmoid layer, the reconstruction activation is sigmoid;
for rectifying layer, the reconstruction activation is softplus.
Examples
--------
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.DenseLayer(network, n_units=196,
... act=tf.nn.sigmoid, name='sigmoid1')
>>> recon_layer1 = tl.layers.ReconLayer(network, x_recon=x, n_units=784,
... act=tf.nn.sigmoid, name='recon_layer1')
>>> recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val,
... denoise_name=None, n_epoch=1200, batch_size=128,
... print_freq=10, save=True, save_name='w1pre_')
Methods
-------
pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_')
Start to pre-train the parameters of previous DenseLayer.
Notes
-----
The input layer should be `DenseLayer` or a layer has only one axes.
You may need to modify this part to define your own cost function.
By default, the cost is implemented as follow:
For sigmoid layer, the implementation can be `UFLDL <http://deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial>`_
For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks <http://doi.org/10.1.1.208.6449>`_
"""
def __init__(
self,
layer = None,
x_recon = None,
name = 'recon_layer',
n_units = 784,
act = tf.nn.softplus,
):
DenseLayer.__init__(self, layer=layer, n_units=n_units, act=act, name=name)
print(" tensorlayer: %s is a ReconLayer" % self.name)
# y : reconstruction outputs; train_params : parameters to train
# Note that: train_params = [W_encoder, b_encoder, W_decoder, b_encoder]
y = self.outputs
self.train_params = self.all_params[-4:]
# =====================================================================
#
# You need to modify the below cost function and optimizer so as to
# implement your own pre-train method.
#
# =====================================================================
lambda_l2_w = 0.004
learning_rate = 0.0001
print(" lambda_l2_w: %f" % lambda_l2_w)
print(" learning_rate: %f" % learning_rate)
# Mean-squre-error i.e. quadratic-cost
mse = tf.reduce_sum(tf.squared_difference(y, x_recon), reduction_indices = 1)
mse = tf.reduce_mean(mse) # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean()
# mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)), reduction_indices = 1))
# mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # <haodong>: Error
# mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon))) # <haodong>: Error
# Cross-entropy
# ce = cost.cross_entropy(y, x_recon) # <haodong>: list , list , Error (only be used for softmax output)
# ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon)) # <haodong>: list , list , Error (only be used for softmax output)
# ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon)) # <haodong>: list , index , Error (only be used for softmax output)
L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \
+ tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2]) # faster than the code below
# L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2]))
# DropNeuro
P_o = cost.lo_regularizer(0.03)(self.train_params[0]) # + cost.lo_regularizer(0.5)(self.train_params[2]) # <haodong>: if add lo on decoder, no neuron will be broken
P_i = cost.li_regularizer(0.03)(self.train_params[0]) # + cost.li_regularizer(0.001)(self.train_params[2])
# L1 of activation outputs
activation_out = self.all_layers[-2]
L1_a = 0.001 * tf.reduce_mean(activation_out) # <haodong>: theano: T.mean( self.a[i] ) # some neuron are broken, white and black
# L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, reduction_indices=0) ) # <haodong>: some neuron are broken, white and black
# L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, reduction_indices=1) ) # <haodong>: some neuron are broken, white and black
# KL Divergence
beta = 4
rho = 0.15
p_hat = tf.reduce_mean(activation_out, reduction_indices = 0) # theano: p_hat = T.mean( self.a[i], axis=0 )
KLD = beta * tf.reduce_sum( rho * tf.log(tf.div(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.sub(float(1), p_hat))) )
# KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) )
# theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) )
# Total cost
if act == tf.nn.softplus:
print(' use: mse, L2_w, L1_a')
self.cost = mse + L1_a + L2_w
elif act == tf.nn.sigmoid:
# ----------------------------------------------------
# Cross-entropy was used in Denoising AE
# print(' use: ce, L2_w, KLD')
# self.cost = ce + L2_w + KLD
# ----------------------------------------------------
# Mean-squared-error was used in Vanilla AE
print(' use: mse, L2_w, KLD')
self.cost = mse + L2_w + KLD
# ----------------------------------------------------
# Add DropNeuro penalty (P_o) can remove neurons of AE
# print(' use: mse, L2_w, KLD, P_o')
# self.cost = mse + L2_w + KLD + P_o
# ----------------------------------------------------
# Add DropNeuro penalty (P_i) can remove neurons of previous layer
# If previous layer is InputLayer, it means remove useless features
# print(' use: mse, L2_w, KLD, P_i')
# self.cost = mse + L2_w + KLD + P_i
else:
raise Exception("Don't support the given reconstruct activation function")
self.train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
epsilon=1e-08, use_locking=False).minimize(self.cost, var_list=self.train_params)
# self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10,
save=True, save_name='w1pre_'):
# ====================================================
#
# You need to modify the cost function in __init__() so as to
# get your own pre-train method.
#
# ====================================================
print(" tensorlayer: %s start pretrain" % self.name)
print(" batch_size: %d" % batch_size)
if denoise_name:
print(" denoising layer keep: %f" % self.all_drop[set_keep[denoise_name]])
dp_denoise = self.all_drop[set_keep[denoise_name]]
else:
print(" no denoising layer")
for epoch in range(n_epoch):
start_time = time.time()
for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
dp_dict = utils.dict_to_one( self.all_drop )
if denoise_name:
dp_dict[set_keep[denoise_name]] = dp_denoise
feed_dict = {x: X_train_a}
feed_dict.update(dp_dict)
sess.run(self.train_op, feed_dict=feed_dict)
if epoch + 1 == 1 or (epoch + 1) % print_freq == 0:
print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
train_loss, n_batch = 0, 0
for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
dp_dict = utils.dict_to_one( self.all_drop )
feed_dict = {x: X_train_a}
feed_dict.update(dp_dict)
err = sess.run(self.cost, feed_dict=feed_dict)
train_loss += err
n_batch += 1
print(" train loss: %f" % (train_loss/ n_batch))
val_loss, n_batch = 0, 0
for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True):
dp_dict = utils.dict_to_one( self.all_drop )
feed_dict = {x: X_val_a}
feed_dict.update(dp_dict)
err = sess.run(self.cost, feed_dict=feed_dict)
val_loss += err
n_batch += 1
print(" val loss: %f" % (val_loss/ n_batch))
if save:
try:
visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28,28], name=save_name+str(epoch+1), fig_idx=2012)
files.save_npz([self.all_params[0]] , name=save_name+str(epoch+1)+'.npz')
except:
raise Exception("You should change visualize.W(), if you want to save the feature images for different dataset")
# Noise layer
[docs]class DropoutLayer(Layer):
"""
The :class:`DropoutLayer` class is a noise layer which randomly set some
values to zero by a given keeping probability.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
keep : float
The keeping probability, the lower more values will be set to zero.
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
>>> network = tl.layers.DenseLayer(network, n_units=800, act = tf.nn.relu, name='relu1')
"""
def __init__(
self,
layer = None,
keep = 0.5,
name = 'dropout_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate DropoutLayer %s: keep: %f" % (self.name, keep))
# The name of placeholder for keep_prob is the same with the name
# of the Layer.
set_keep[name] = tf.placeholder(tf.float32)
self.outputs = tf.nn.dropout(self.inputs, set_keep[name], name=name) # 1.2
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_drop.update( {set_keep[name]: keep} )
self.all_layers.extend( [self.outputs] )
# print(set_keep[name])
# Tensor("Placeholder_2:0", dtype=float32)
# print(denoising1)
# Tensor("Placeholder_2:0", dtype=float32)
# print(self.all_drop[denoising1])
# 0.8
#
# https://www.tensorflow.org/versions/r0.8/tutorials/mnist/tf/index.html
# The optional feed_dict argument allows the caller to override the
# value of tensors in the graph. Each key in feed_dict can be one of
# the following types:
# If the key is a Tensor, the value may be a Python scalar, string,
# list, or numpy ndarray that can be converted to the same dtype as that
# tensor. Additionally, if the key is a placeholder, the shape of the
# value will be checked for compatibility with the placeholder.
# If the key is a SparseTensor, the value should be a SparseTensorValue.
[docs]class DropconnectDenseLayer(Layer):
"""
The :class:`DropconnectDenseLayer` class is `DenseLayer` with DropConnect
behaviour which randomly remove connection between this layer to previous
layer by a given keeping probability.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
keep : float
The keeping probability, the lower more values will be set to zero.
n_units : int
The number of units of the layer.
act : activation function
The function that is applied to the layer activations.
W_init : weights initializer
The initializer for initializing the weight matrix.
b_init : biases initializer
The initializer for initializing the bias vector.
W_init_args : dictionary
The arguments for the weights tf.get_variable().
b_init_args : dictionary
The arguments for the biases tf.get_variable().
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.8,
... n_units=800, act = tf.nn.relu, name='dropconnect_relu1')
>>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
... n_units=800, act = tf.nn.relu, name='dropconnect_relu2')
>>> network = tl.layers.DropconnectDenseLayer(network, keep = 0.5,
... n_units=10, act = tl.activation.identity, name='output_layer')
References
----------
`Wan, L. (2013). Regularization of neural networks using dropconnect <http://machinelearning.wustl.edu/mlpapers/papers/icml2013_wan13>`_
"""
def __init__(
self,
layer = None,
keep = 0.5,
n_units = 100,
act = tf.nn.relu,
W_init = tf.truncated_normal_initializer(stddev=0.1),
b_init = tf.constant_initializer(value=0.0),
W_init_args = {},
b_init_args = {},
name ='dropconnect_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
if self.inputs.get_shape().ndims != 2:
raise Exception("The input dimension must be rank 2")
n_in = int(self.inputs._shape[-1])
self.n_units = n_units
print(" tensorlayer:Instantiate DropconnectDenseLayer %s: %d, %s" % (self.name, self.n_units, act.__name__))
with tf.variable_scope(name) as vs:
W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, **W_init_args )
b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, **b_init_args )
self.outputs = act(tf.matmul(self.inputs, W) + b)#, name=name) # 1.2
set_keep[name] = tf.placeholder(tf.float32)
W_dropcon = tf.nn.dropout(W, set_keep[name])
self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b)
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_drop.update( {set_keep[name]: keep} )
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
# Convolutional layer
[docs]class Conv2dLayer(Layer):
"""
The :class:`Conv2dLayer` class is a 2D CNN layer, see `tf.nn.conv2d`.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
act : activation function
The function that is applied to the layer activations.
shape : list of shape
shape of the filters, [filter_height, filter_width, in_channels, out_channels].
strides : a list of ints.
The stride of the sliding window for each dimension of input.\n
It Must be in the same order as the dimension specified with format.
padding : a string from: "SAME", "VALID".
The type of padding algorithm to use.
W_init : weights initializer
The initializer for initializing the weight matrix.
b_init : biases initializer
The initializer for initializing the bias vector.
W_init_args : dictionary
The arguments for the weights tf.get_variable().
b_init_args : dictionary
The arguments for the biases tf.get_variable().
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.Conv2dLayer(network,
... act = tf.nn.relu,
... shape = [5, 5, 1, 32], # 32 features for each 5x5 patch
... strides=[1, 1, 1, 1],
... padding='SAME',
... W_init=tf.truncated_normal_initializer(stddev=5e-2),
... W_init_args={},
... b_init = tf.constant_initializer(value=0.0),
... b_init_args = {},
... name ='cnn_layer1') # output: (?, 28, 28, 32)
>>> network = tl.layers.PoolLayer(network,
... ksize=[1, 2, 2, 1],
... strides=[1, 2, 2, 1],
... padding='SAME',
... pool = tf.nn.max_pool,
... name ='pool_layer1',) # output: (?, 14, 14, 32)
>>> Without TensorLayer, you can initialize the parameters as follow.
>>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv')
>>> b = tf.Variable(b_init(shape=[32], ), name='b_conv')
>>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W,
... strides=[1, 1, 1, 1],
... padding='SAME') + b )
"""
def __init__(
self,
layer = None,
act = tf.nn.relu,
shape = [5, 5, 1, 100],
strides=[1, 1, 1, 1],
padding='SAME',
W_init = tf.truncated_normal_initializer(stddev=0.1),
b_init = tf.constant_initializer(value=0.0),
W_init_args = {},
b_init_args = {},
name ='cnn_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate Conv2dLayer %s: %s, %s, %s, %s" %
(self.name, str(shape), str(strides), padding, act.__name__))
with tf.variable_scope(name) as vs:
W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, **W_init_args )
b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, **b_init_args )
self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding) + b ) #1.2
# self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding) + b )
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
[docs]class Conv3dLayer(Layer):
"""
The :class:`Conv3dLayer` class is a 3D CNN layer, see `tf.nn.conv3d`.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
act : activation function
The function that is applied to the layer activations.
shape : list of shape
shape of the filters, [filter_depth, filter_height, filter_width, in_channels, out_channels].
strides : a list of ints. 1-D of length 4.
The stride of the sliding window for each dimension of input. Must be in the same order as the dimension specified with format.
padding : a string from: "SAME", "VALID".
The type of padding algorithm to use.
W_init : weights initializer
The initializer for initializing the weight matrix.
b_init : biases initializer
The initializer for initializing the bias vector.
W_init_args : dictionary
The arguments for the weights initializer.
b_init_args : dictionary
The arguments for the biases initializer.
name : a string or None
An optional name to attach to this layer.
Links
------
`tf.nn.conv3d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d>`_
"""
def __init__(
self,
layer = None,
act = tf.nn.relu,
shape = [],
strides=[],
padding='SAME',
W_init = tf.truncated_normal_initializer(stddev=0.1),
b_init = tf.constant_initializer(value=0.0),
W_init_args = {},
b_init_args = {},
name ='cnn3d_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate Conv3dLayer %s: %s, %s, %s, %s" % (self.name, str(shape), str(strides), padding, act.__name__))
with tf.variable_scope(name) as vs:
# W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
# b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, **W_init_args )
b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, **b_init_args )
self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
# self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
[docs]class DeConv3dLayer(Layer):
"""
The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose`.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
act : activation function
The function that is applied to the layer activations.
shape : list of shape
shape of the filters, [depth, height, width, output_channels, in_channels], filter's in_channels dimension must match that of value.
output_shape : list of output shape
representing the output shape of the deconvolution op.
strides : a list of ints.
The stride of the sliding window for each dimension of the input tensor.
padding : a string from: "SAME", "VALID".
The type of padding algorithm to use.
W_init : weights initializer
The initializer for initializing the weight matrix.
b_init : biases initializer
The initializer for initializing the bias vector.
W_init_args : dictionary
The arguments for the weights initializer.
b_init_args : dictionary
The arguments for the biases initializer.
name : a string or None
An optional name to attach to this layer.
Links
------
`tf.nn.conv3d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d_transpose>`_
"""
def __init__(
self,
layer = None,
act = tf.nn.relu,
shape = [2, 2, 2, 512, 1024],
output_shape = [None, 50, 50,50,512],
strides = [1,2,2,2,1],
padding = 'SAME',
W_init = tf.truncated_normal_initializer(stddev=0.1),
b_init = tf.constant_initializer(value=0.0),
W_init_args = {},
b_init_args = {},
name ='decnn_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate DeConv2dLayer %s: %s, %s, %s, %s, %s" %
(self.name, str(shape), str(output_shape), str(strides), padding, act))
with tf.variable_scope(name) as vs:
W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, **W_init_args )
b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, **b_init_args )
self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
# Pooling layer
[docs]class PoolLayer(Layer):
"""
The :class:`PoolLayer` class is a Pooling layer, you can choose
``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D or
``tf.nn.max_pool3d()`` and ``tf.nn.avg_pool3d()`` for 3D.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
ksize : a list of ints that has length >= 4.
The size of the window for each dimension of the input tensor.
strides : a list of ints that has length >= 4.
The stride of the sliding window for each dimension of the input tensor.
padding : a string from: "SAME", "VALID".
The type of padding algorithm to use.
pool : a pooling function
tf.nn.max_pool , tf.nn.avg_pool ...
name : a string or None
An optional name to attach to this layer.
Examples
--------
see Conv2dLayer
References
------------
`TensorFlow Pooling <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#pooling>`_
"""
def __init__(
self,
layer = None,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
pool = tf.nn.max_pool,
name ='pool_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate PoolLayer %s: %s, %s, %s, %s" %
(self.name, str(ksize), str(strides), padding, pool.__name__))
self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name)
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
# Recurrent layer
[docs]class RNNLayer(Layer):
"""
The :class:`RNNLayer` class is a RNN layer, you can implement vanilla RNN,
LSTM and GRU with it.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
cell_fn : a TensorFlow's core RNN cell as follow.
see `RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_\n
class tf.nn.rnn_cell.BasicRNNCell\n
class tf.nn.rnn_cell.BasicLSTMCell\n
class tf.nn.rnn_cell.GRUCell\n
class tf.nn.rnn_cell.LSTMCell
cell_init_args : a dictionary
The arguments for the cell initializer.
n_hidden : a int
The number of hidden units in the layer.
n_steps : a int
The sequence length.
return_last : boolen
If True, return the last output, "Sequence input and single output"\n
If False, return all outputs, "Synced sequence input and output"\n
In other word, if you want to apply one or more RNN(s) on this layer, set to False.
return_seq_2d : boolen
When return_last = False\n
if True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it.
if False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it.
name : a string or None
An optional name to attach to this layer.
Field (Class Variables)
-----------------------
outputs : a tensor
The output of this RNN.
return_last = False, outputs = all cell_output, which is the hidden state.
cell_output.get_shape() = (?, n_hidden)
final_state : a tensor or StateTuple
When state_is_tuple = False,
it is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden].\n
When state_is_tuple = True, it stores two elements: (c, h), in that order.
You can get the final state after each iteration during training, then
feed it to the initial state of next iteration.
initial_state : a tensor or StateTuple
It is the initial state of this RNN layer, you can use it to initialize
your state at the begining of each epoch or iteration according to your
training procedure.
Examples
--------
>>> For words
>>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
>>> network = tl.layers.EmbeddingInputlayer(
... inputs = input_data,
... vocabulary_size = vocab_size,
... embedding_size = hidden_size,
... E_init = tf.random_uniform_initializer(-init_scale, init_scale),
... name ='embedding_layer')
>>> if is_training:
>>> network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop1')
>>> network = tl.layers.RNNLayer(network,
... cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
... cell_init_args={'forget_bias': 0.0},# 'state_is_tuple': True},
... n_hidden=hidden_size,
... initializer=tf.random_uniform_initializer(-init_scale, init_scale),
... n_steps=num_steps,
... return_last=False,
... name='basic_lstm_layer1')
>>> lstm1 = network
>>> if is_training:
>>> network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop2')
>>> network = tl.layers.RNNLayer(network,
... cell_fn=tf.nn.rnn_cell.BasicLSTMCell,
... cell_init_args={'forget_bias': 0.0}, # 'state_is_tuple': True},
... n_hidden=hidden_size,
... initializer=tf.random_uniform_initializer(-init_scale, init_scale),
... n_steps=num_steps,
... return_last=False,
... return_seq_2d=True,
... name='basic_lstm_layer2')
>>> lstm2 = network
>>> if is_training:
>>> network = tl.layers.DropoutLayer(network, keep=keep_prob, name='drop3')
>>> network = tl.layers.DenseLayer(network,
... n_units=vocab_size,
... W_init=tf.random_uniform_initializer(-init_scale, init_scale),
... b_init=tf.random_uniform_initializer(-init_scale, init_scale),
... act = tl.activation.identity, name='output_layer')
>>> For CNN+LSTM
>>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1])
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.Conv2dLayer(network,
... act = tf.nn.relu,
... shape = [5, 5, 1, 32], # 32 features for each 5x5 patch
... strides=[1, 2, 2, 1],
... padding='SAME',
... name ='cnn_layer1')
>>> network = tl.layers.PoolLayer(network,
... ksize=[1, 2, 2, 1],
... strides=[1, 2, 2, 1],
... padding='SAME',
... pool = tf.nn.max_pool,
... name ='pool_layer1')
>>> network = tl.layers.Conv2dLayer(network,
... act = tf.nn.relu,
... shape = [5, 5, 32, 10], # 10 features for each 5x5 patch
... strides=[1, 2, 2, 1],
... padding='SAME',
... name ='cnn_layer2')
>>> network = tl.layers.PoolLayer(network,
... ksize=[1, 2, 2, 1],
... strides=[1, 2, 2, 1],
... padding='SAME',
... pool = tf.nn.max_pool,
... name ='pool_layer2')
>>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
>>> network = tl.layers.ReshapeLayer(network, shape=[-1, num_steps, int(network.outputs._shape[-1])])
>>> rnn1 = tl.layers.RNNLayer(network,
... cell_fn=tf.nn.rnn_cell.LSTMCell,
... cell_init_args={},
... n_hidden=200,
... initializer=tf.random_uniform_initializer(-0.1, 0.1),
... n_steps=num_steps,
... return_last=False,
... return_seq_2d=True,
... name='rnn_layer')
>>> network = tl.layers.DenseLayer(rnn1, n_units=3,
... act = tl.activation.identity, name='output_layer')
Notes
-----
If the input to this layer has more than two axes, we need to flatten the
input by using :class:`FlattenLayer`.
References
----------
`Neural Network RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_\n
`tensorflow/python/ops/rnn.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py>`_\n
`tensorflow/python/ops/rnn_cell.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py>`_
see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm.py`` and ``tutorial_generate_text.py``
"""
def __init__(
self,
layer = None,
cell_fn = tf.nn.rnn_cell.BasicRNNCell,
cell_init_args = {},
n_hidden = 100,
initializer = tf.random_uniform_initializer(-0.1, 0.1),
n_steps = 5,
return_last = False,
# is_reshape = True,
return_seq_2d = False,
name = 'rnn_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate RNNLayer %s: n_hidden:%d, n_steps:%d, in_dim:%d %s, cell_fn:%s " % (self.name, n_hidden,
n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
# You can get the dimension by .get_shape() or ._shape, and check the
# dimension by .with_rank() as follow.
# self.inputs.get_shape().with_rank(2)
# self.inputs.get_shape().with_rank(3)
# is_reshape : boolen (deprecate)
# Reshape the inputs to 3 dimension tensor.\n
# If input is[batch_size, n_steps, n_features], we do not need to reshape it.\n
# If input is [batch_size * n_steps, n_features], we need to reshape it.
# if is_reshape:
# self.inputs = tf.reshape(self.inputs, shape=[-1, n_steps, int(self.inputs._shape[-1])])
fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
if fixed_batch_size.value:
batch_size = fixed_batch_size.value
print(" RNN batch_size (concurrent processes): %d" % batch_size)
else:
from tensorflow.python.ops import array_ops
batch_size = array_ops.shape(self.inputs)[0]
print(" non specified batch_size, use a tensor instead.")
self.batch_size = batch_size
# Simplified version of tensorflow.models.rnn.rnn.py's rnn().
# This builds an unrolled LSTM for tutorial purposes only.
# In general, use the rnn() or state_saving_rnn() from rnn.py.
#
# The alternative version of the code below is:
#
# from tensorflow.models.rnn import rnn
# inputs = [tf.squeeze(input_, [1])
# for input_ in tf.split(1, num_steps, inputs)]
# outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state)
outputs = []
self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args)
self.initial_state = cell.zero_state(batch_size, dtype=tf.float32)
state = self.initial_state
# with tf.variable_scope("model", reuse=None, initializer=initializer):
with tf.variable_scope(name, initializer=initializer) as vs:
for time_step in range(n_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(self.inputs[:, time_step, :], state)
outputs.append(cell_output)
# Retrieve just the RNN variables.
# rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)]
rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
print(" n_params : %d" % (len(rnn_variables)))
if return_last:
# 2D Tensor [batch_size, n_hidden]
self.outputs = outputs[-1]
else:
if return_seq_2d:
# PTB tutorial:
# 2D Tensor [n_example, n_hidden]
self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
else:
# <akara>:
# 3D Tensor [n_example/n_steps, n_steps, n_hidden]
self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden])
self.final_state = state
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
# print(type(self.outputs))
self.all_layers.extend( [self.outputs] )
self.all_params.extend( rnn_variables )
# Shape layer
[docs]class FlattenLayer(Layer):
"""
The :class:`FlattenLayer` class is layer which reshape high-dimension
input to a vector. Then we can apply DenseLayer, RNNLayer, ConcatLayer and
etc on the top of it.
[batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask]
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
>>> network = tl.layers.InputLayer(x, name='input_layer')
>>> network = tl.layers.Conv2dLayer(network,
... act = tf.nn.relu,
... shape = [5, 5, 32, 64],
... strides=[1, 1, 1, 1],
... padding='SAME',
... name ='cnn_layer')
>>> network = tl.layers.Pool2dLayer(network,
... ksize=[1, 2, 2, 1],
... strides=[1, 2, 2, 1],
... padding='SAME',
... pool = tf.nn.max_pool,
... name ='pool_layer',)
>>> network = tl.layers.FlattenLayer(network, name='flatten_layer')
"""
def __init__(
self,
layer = None,
name ='flatten_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
self.outputs = flatten_reshape(self.inputs, name=name)
self.n_units = int(self.outputs._shape[-1])
print(" tensorlayer:Instantiate FlattenLayer %s: %d" % (self.name, self.n_units))
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
[docs]class ConcatLayer(Layer):
"""
The :class:`ConcatLayer` class is layer which concat (merge) two or more
:class:`DenseLayer` to a single class:`DenseLayer`.
Parameters
----------
layer : a list of :class:`Layer` instances
The `Layer` class feeding into this layer.
concat_dim : int
Dimension along which to concatenate.
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> sess = tf.InteractiveSession()
>>> x = tf.placeholder(tf.float32, shape=[None, 784])
>>> inputs = tl.layers.InputLayer(x, name='input_layer')
>>> net1 = tl.layers.DenseLayer(inputs, n_units=800, act = tf.nn.relu, name='relu1_1')
>>> net2 = tl.layers.DenseLayer(inputs, n_units=300, act = tf.nn.relu, name='relu2_1')
>>> network = tl.layers.ConcatLayer(layer = [net1, net2], name ='concat_layer')
... tensorlayer:Instantiate InputLayer input_layer (?, 784)
... tensorlayer:Instantiate DenseLayer relu1_1: 800, <function relu at 0x1108e41e0>
... tensorlayer:Instantiate DenseLayer relu2_1: 300, <function relu at 0x1108e41e0>
... tensorlayer:Instantiate ConcatLayer concat_layer, 1100
...
>>> sess.run(tf.initialize_all_variables())
>>> network.print_params()
... param 0: (784, 800) (mean: 0.000021, median: -0.000020 std: 0.035525)
... param 1: (800,) (mean: 0.000000, median: 0.000000 std: 0.000000)
... param 2: (784, 300) (mean: 0.000000, median: -0.000048 std: 0.042947)
... param 3: (300,) (mean: 0.000000, median: 0.000000 std: 0.000000)
... num of params: 863500
>>> network.print_layers()
... layer 0: Tensor("Relu:0", shape=(?, 800), dtype=float32)
... layer 1: Tensor("Relu_1:0", shape=(?, 300), dtype=float32)
...
"""
def __init__(
self,
layer = [],
concat_dim = 1,
name ='concat_layer',
):
Layer.__init__(self, name=name)
self.inputs = []
for l in layer:
self.inputs.append(l.outputs)
self.outputs = tf.concat(concat_dim, self.inputs, name=name) # 1.2
self.n_units = int(self.outputs._shape[-1])
print(" tensorlayer:Instantiate ConcatLayer %s, %d" % (self.name, self.n_units))
self.all_layers = list(layer[0].all_layers)
self.all_params = list(layer[0].all_params)
self.all_drop = dict(layer[0].all_drop)
for i in range(1, len(layer)):
self.all_layers.extend(list(layer[i].all_layers))
self.all_params.extend(list(layer[i].all_params))
self.all_drop.update(dict(layer[i].all_drop))
[docs]class ReshapeLayer(Layer):
"""
The :class:`ReshapeLayer` class is layer which reshape the tensor.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
shape : a list
The output shape.
name : a string or None
An optional name to attach to this layer.
Examples
--------
>>> The core of this layer is ``tf.reshape``.
>>> Use TensorFlow only :
>>> x = tf.placeholder(tf.float32, shape=[None, 3])
>>> y = tf.reshape(x, shape=[-1, 3, 3])
>>> sess = tf.InteractiveSession()
>>> print(sess.run(y, feed_dict={x:[[1,1,1],[2,2,2],[3,3,3],[4,4,4],[5,5,5],[6,6,6]]}))
... [[[ 1. 1. 1.]
... [ 2. 2. 2.]
... [ 3. 3. 3.]]
... [[ 4. 4. 4.]
... [ 5. 5. 5.]
... [ 6. 6. 6.]]]
"""
def __init__(
self,
layer = None,
shape = [],
name ='reshape_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
self.outputs = tf.reshape(self.inputs, shape=shape, name=name)
print(" tensorlayer:Instantiate ReshapeLayer %s: %s" % (self.name, self.outputs._shape))
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
## TF-Slim layer
[docs]class SlimNetsLayer(Layer):
"""
The :class:`SlimNetsLayer` class can be used to merge all TF-Slim nets into
TensorLayer. Model can be found in `slim-model <https://github.com/tensorflow/models/tree/master/slim#Install>`_ , more about slim
see `slim-git <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim>`_ .
Parameters
----------
layer : a list of :class:`Layer` instances
The `Layer` class feeding into this layer.
slim_layer : a slim network function
The network you want to stack onto, end with ``return net, end_points``.
name : a string or None
An optional name to attach to this layer.
Note
-----
The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this
network is not in order ! Fortunately, the ``all_params`` are in order.
"""
def __init__(
self,
layer = None,
slim_layer = None,
slim_args = {},
name ='slim_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__))
with tf.variable_scope(name) as vs:
net, end_points = slim_layer(self.inputs, **slim_args)
slim_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
self.outputs = net
slim_layers = []
for v in end_points.values():
tf.contrib.layers.summaries.summarize_activation(v)
slim_layers.append(v)
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( slim_layers )
self.all_params.extend( slim_variables )
## Flow control layer
[docs]class MultiplexerLayer(Layer):
"""
The :class:`MultiplexerLayer` selects one of several input and forwards the selected input into the output,
see `tutorial_mnist_multiplexer.py`.
Parameters
----------
layer : a list of :class:`Layer` instances
The `Layer` class feeding into this layer.
name : a string or None
An optional name to attach to this layer.
Field (Class Variables)
-----------------------
sel : a placeholder
Input an int [0, inf], which input is the output
Examples
--------
>>> x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
>>> y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_')
>>> # define the network
>>> net_in = tl.layers.InputLayer(x, name='input_layer')
>>> net_in = tl.layers.DropoutLayer(net_in, keep=0.8, name='drop1')
>>> # net 0
>>> net_0 = tl.layers.DenseLayer(net_in, n_units=800,
... act = tf.nn.relu, name='net0/relu1')
>>> net_0 = tl.layers.DropoutLayer(net_0, keep=0.5, name='net0/drop2')
>>> net_0 = tl.layers.DenseLayer(net_0, n_units=800,
... act = tf.nn.relu, name='net0/relu2')
>>> # net 1
>>> net_1 = tl.layers.DenseLayer(net_in, n_units=800,
... act = tf.nn.relu, name='net1/relu1')
>>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop2')
>>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
... act = tf.nn.relu, name='net1/relu2')
>>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop3')
>>> net_1 = tl.layers.DenseLayer(net_1, n_units=800,
... act = tf.nn.relu, name='net1/relu3')
>>> # multiplexer
>>> net_mux = tl.layers.MultiplexerLayer(layer = [net_0, net_1], name='mux_layer')
>>> network = tl.layers.ReshapeLayer(net_mux, shape=[-1, 800], name='reshape_layer') #
>>> network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
>>> # output layer
>>> network = tl.layers.DenseLayer(network, n_units=10,
... act = tf.identity, name='output_layer')
References
------------
See ``tf.pack()`` and ``tf.gather()`` at `TensorFlow - Slicing and Joining <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#slicing-and-joining>`_
"""
def __init__(self,
layer = [],
name='mux_layer'):
Layer.__init__(self, name=name)
self.n_inputs = len(layer)
self.inputs = []
for l in layer:
self.inputs.append(l.outputs)
all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim # 1.2
print(" tensorlayer:Instantiate MultiplexerLayer %s: n_inputs: %d" % (self.name, self.n_inputs))
self.sel = tf.placeholder(tf.int32)
self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2
# print(self.outputs, vars(self.outputs))
# # tf.reshape(self.outputs, shape=)
# exit()
# the same with ConcatLayer
self.all_layers = list(layer[0].all_layers)
self.all_params = list(layer[0].all_params)
self.all_drop = dict(layer[0].all_drop)
for i in range(1, len(layer)):
self.all_layers.extend(list(layer[i].all_layers))
self.all_params.extend(list(layer[i].all_params))
self.all_drop.update(dict(layer[i].all_drop))
## We can Duplicate the network instead of DemultiplexerLayer
# class DemultiplexerLayer(Layer):
# """
# The :class:`DemultiplexerLayer` takes a single input and select one of many output lines, which is connected to the input.
#
# Parameters
# ----------
# layer : a list of :class:`Layer` instances
# The `Layer` class feeding into this layer.
# n_outputs : a int
# The number of output
# name : a string or None
# An optional name to attach to this layer.
#
# Field (Class Variables)
# -----------------------
# sel : a placeholder
# Input int [0, inf], the
# outputs : a list of Tensor
# A list of outputs
#
# Examples
# --------
# >>>
# """
# def __init__(self,
# layer = None,
# name='demux_layer'):
# Layer.__init__(self, name=name)
# self.outputs = []
## Wrapper
[docs]class EmbeddingAttentionSeq2seqWrapper(Layer):
"""Sequence-to-sequence model with attention and for multiple buckets.
This example implements a multi-layer recurrent neural network as encoder,
and an attention-based decoder. This is the same as the model described in
this paper:
“Grammar as a Foreign Language”
http://arxiv.org/abs/1412.7449 - please look there for details,
or into the seq2seq library for complete model implementation.
This example also allows to use GRU cells in addition to LSTM cells, and
sampled softmax to handle large output vocabulary size. A single-layer
version of this model, but with bi-directional encoder, was presented in
“Neural Machine Translation by Jointly Learning to Align and Translate”
http://arxiv.org/abs/1409.0473
The sampled softmax is described in Section 3 of the following paper.
“On Using Very Large Target Vocabulary for Neural Machine Translation”
http://arxiv.org/abs/1412.2007
Parameters
----------
source_vocab_size : size of the source vocabulary.
target_vocab_size : size of the target vocabulary.
buckets : a list of pairs (I, O), where I specifies maximum input length
that will be processed in that bucket, and O specifies maximum output
length. Training instances that have inputs longer than I or outputs
longer than O will be pushed to the next bucket and padded accordingly.
We assume that the list is sorted, e.g., [(2, 4), (8, 16)].
size : number of units in each layer of the model.
num_layers : number of layers in the model.
max_gradient_norm : gradients will be clipped to maximally this norm.
batch_size : the size of the batches used during training;
the model construction is independent of batch_size, so it can be
changed after initialization if this is convenient, e.g., for decoding.
learning_rate : learning rate to start with.
learning_rate_decay_factor : decay learning rate by this much when needed.
use_lstm : if true, we use LSTM cells instead of GRU cells.
num_samples : number of samples for sampled softmax.
forward_only : if set, we do not construct the backward pass in the model.
name : a string or None
An optional name to attach to this layer.
"""
def __init__(self,
source_vocab_size,
target_vocab_size,
buckets,
size,
num_layers,
max_gradient_norm,
batch_size,
learning_rate,
learning_rate_decay_factor,
use_lstm=False,
num_samples=512,
forward_only=False,
name='wrapper'):
Layer.__init__(self)#, name=name)
self.source_vocab_size = source_vocab_size
self.target_vocab_size = target_vocab_size
self.buckets = buckets
self.batch_size = batch_size
self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
self.learning_rate_decay_op = self.learning_rate.assign(
self.learning_rate * learning_rate_decay_factor)
self.global_step = tf.Variable(0, trainable=False, name='global_step')
# =========== Fake output Layer for compute cost ======
# If we use sampled softmax, we need an output projection.
with tf.variable_scope(name) as vs:
output_projection = None
softmax_loss_function = None
# Sampled softmax only makes sense if we sample less than vocabulary size.
if num_samples > 0 and num_samples < self.target_vocab_size:
w = tf.get_variable("proj_w", [size, self.target_vocab_size])
w_t = tf.transpose(w)
b = tf.get_variable("proj_b", [self.target_vocab_size])
output_projection = (w, b)
def sampled_loss(inputs, labels):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
self.target_vocab_size)
softmax_loss_function = sampled_loss
# ============ Seq Encode Layer =============
# Create the internal multi-layer cell for our RNN.
single_cell = tf.nn.rnn_cell.GRUCell(size)
if use_lstm:
single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
cell = single_cell
if num_layers > 1:
cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)
# ============== Seq Decode Layer ============
# The seq2seq function: we use embedding for the input and attention.
def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.nn.seq2seq.embedding_attention_seq2seq(
encoder_inputs, decoder_inputs, cell,
num_encoder_symbols=source_vocab_size,
num_decoder_symbols=target_vocab_size,
embedding_size=size,
output_projection=output_projection,
feed_previous=do_decode)
#=============================================================
# Feeds for inputs.
self.encoder_inputs = []
self.decoder_inputs = []
self.target_weights = []
for i in xrange(buckets[-1][0]): # Last bucket is the biggest one.
self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="encoder{0}".format(i)))
for i in xrange(buckets[-1][1] + 1):
self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
name="decoder{0}".format(i)))
self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
name="weight{0}".format(i)))
# Our targets are decoder inputs shifted by one.
targets = [self.decoder_inputs[i + 1]
for i in xrange(len(self.decoder_inputs) - 1)]
self.targets = targets # DH add for debug
# Training outputs and losses.
if forward_only:
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
softmax_loss_function=softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if output_projection is not None:
for b in xrange(len(buckets)):
self.outputs[b] = [
tf.matmul(output, output_projection[0]) + output_projection[1]
for output in self.outputs[b]
]
else:
self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
self.encoder_inputs, self.decoder_inputs, targets,
self.target_weights, buckets,
lambda x, y: seq2seq_f(x, y, False),
softmax_loss_function=softmax_loss_function)
# Gradients and SGD update operation for training the model.
params = tf.trainable_variables()
if not forward_only:
self.gradient_norms = []
self.updates = []
opt = tf.train.GradientDescentOptimizer(self.learning_rate)
for b in xrange(len(buckets)):
gradients = tf.gradients(self.losses[b], params)
clipped_gradients, norm = tf.clip_by_global_norm(gradients,
max_gradient_norm)
self.gradient_norms.append(norm)
self.updates.append(opt.apply_gradients(
zip(clipped_gradients, params), global_step=self.global_step))
# if save into npz
self.all_params = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name)
# if save into ckpt
self.saver = tf.train.Saver(tf.all_variables())
[docs] def step(self, session, encoder_inputs, decoder_inputs, target_weights,
bucket_id, forward_only):
"""Run a step of the model feeding the given inputs.
Parameters
----------
session : tensorflow session to use.
encoder_inputs : list of numpy int vectors to feed as encoder inputs.
decoder_inputs : list of numpy int vectors to feed as decoder inputs.
target_weights : list of numpy float vectors to feed as target weights.
bucket_id : which bucket of the model to use.
forward_only : whether to do the backward step or only forward.
Returns
--------
A triple consisting of gradient norm (or None if we did not do backward),
average perplexity, and the outputs.
Raises
--------
ValueError : if length of encoder_inputs, decoder_inputs, or
target_weights disagrees with bucket size for the specified bucket_id.
"""
# Check if the sizes match.
encoder_size, decoder_size = self.buckets[bucket_id]
if len(encoder_inputs) != encoder_size:
raise ValueError("Encoder length must be equal to the one in bucket,"
" %d != %d." % (len(encoder_inputs), encoder_size))
if len(decoder_inputs) != decoder_size:
raise ValueError("Decoder length must be equal to the one in bucket,"
" %d != %d." % (len(decoder_inputs), decoder_size))
if len(target_weights) != decoder_size:
raise ValueError("Weights length must be equal to the one in bucket,"
" %d != %d." % (len(target_weights), decoder_size))
# print('in model.step()')
# print('a',bucket_id, encoder_size, decoder_size)
# Input feed: encoder inputs, decoder inputs, target_weights, as provided.
input_feed = {}
for l in xrange(encoder_size):
input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
for l in xrange(decoder_size):
input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
input_feed[self.target_weights[l].name] = target_weights[l]
# print(self.encoder_inputs[l].name)
# print(self.decoder_inputs[l].name)
# print(self.target_weights[l].name)
# Since our targets are decoder inputs shifted by one, we need one more.
last_target = self.decoder_inputs[decoder_size].name
input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
# print('last_target', last_target)
# Output feed: depends on whether we do a backward step or not.
if not forward_only:
output_feed = [self.updates[bucket_id], # Update Op that does SGD.
self.gradient_norms[bucket_id], # Gradient norm.
self.losses[bucket_id]] # Loss for this batch.
else:
output_feed = [self.losses[bucket_id]] # Loss for this batch.
for l in xrange(decoder_size): # Output logits.
output_feed.append(self.outputs[bucket_id][l])
outputs = session.run(output_feed, input_feed)
if not forward_only:
return outputs[1], outputs[2], None # Gradient norm, loss, no outputs.
else:
return None, outputs[0], outputs[1:] # No gradient norm, loss, outputs.
[docs] def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
"""Get a random batch of data from the specified bucket, prepare for step.
To feed data in step(..) it must be a list of batch-major vectors, while
data here contains single length-major cases. So the main logic of this
function is to re-index data cases to be in the proper format for feeding.
Parameters
----------
data : a tuple of size len(self.buckets) in which each element contains
lists of pairs of input and output data that we use to create a batch.
bucket_id : integer, which bucket to get the batch for.
PAD_ID : int
Index of Padding in vocabulary
GO_ID : int
Index of GO in vocabulary
EOS_ID : int
Index of End of sentence in vocabulary
UNK_ID : int
Index of Unknown word in vocabulary
Returns
-------
The triple (encoder_inputs, decoder_inputs, target_weights) for
the constructed batch that has the proper format to call step(...) later.
"""
encoder_size, decoder_size = self.buckets[bucket_id]
encoder_inputs, decoder_inputs = [], []
# Get a random batch of encoder and decoder inputs from data,
# pad them if needed, reverse encoder inputs and add GO to decoder.
for _ in xrange(self.batch_size):
encoder_input, decoder_input = random.choice(data[bucket_id])
# Encoder inputs are padded and then reversed.
encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input))
encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
# Decoder inputs get an extra "GO" symbol, and are padded then.
decoder_pad_size = decoder_size - len(decoder_input) - 1
decoder_inputs.append([GO_ID] + decoder_input +
[PAD_ID] * decoder_pad_size)
# Now we create batch-major vectors from the data selected above.
batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
# Batch encoder inputs are just re-indexed encoder_inputs.
for length_idx in xrange(encoder_size):
batch_encoder_inputs.append(
np.array([encoder_inputs[batch_idx][length_idx]
for batch_idx in xrange(self.batch_size)], dtype=np.int32))
# Batch decoder inputs are re-indexed decoder_inputs, we create weights.
for length_idx in xrange(decoder_size):
batch_decoder_inputs.append(
np.array([decoder_inputs[batch_idx][length_idx]
for batch_idx in xrange(self.batch_size)], dtype=np.int32))
# Create target_weights to be 0 for targets that are padding.
batch_weight = np.ones(self.batch_size, dtype=np.float32)
for batch_idx in xrange(self.batch_size):
# We set weight to 0 if the corresponding target is a PAD symbol.
# The corresponding target is decoder_input shifted by 1 forward.
if length_idx < decoder_size - 1:
target = decoder_inputs[batch_idx][length_idx + 1]
if length_idx == decoder_size - 1 or target == PAD_ID:
batch_weight[batch_idx] = 0.0
batch_weights.append(batch_weight)
return batch_encoder_inputs, batch_decoder_inputs, batch_weights
## Developing or Untested
# dense
class MaxoutLayer(Layer):
"""
Coming soon
Single DenseLayer with Max-out behaviour, work well with Dropout.
References
-----------
`Goodfellow (2013) Maxout Networks <http://arxiv.org/abs/1302.4389>`_
"""
def __init__(
self,
layer = None,
n_units = 100,
name ='maxout_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate MaxoutLayer %s: %d" % (self.name, self.n_units))
with tf.variable_scope(name) as vs:
pass
# W = tf.Variable(init.xavier_init(n_inputs=n_in, n_outputs=n_units, uniform=True), name='W')
# b = tf.Variable(tf.zeros([n_units]), name='b')
# self.outputs = act(tf.matmul(self.inputs, W) + b)
# https://www.tensorflow.org/versions/r0.9/api_docs/python/array_ops.html#pack
# http://stackoverflow.com/questions/34362193/how-to-explicitly-broadcast-a-tensor-to-match-anothers-shape-in-tensorflow
# tf.concat tf.pack tf.tile
self.all_layers = list(layer.all_layers)
self.all_params = list(layer.all_params)
self.all_drop = dict(layer.all_drop)
self.all_layers.extend( [self.outputs] )
self.all_params.extend( [W, b] )
# noise
class GaussianNoiseLayer(Layer):
"""
Coming soon
"""
def __init__(
self,
layer = None,
# keep = 0.5,
name = 'gaussian_noise_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
print(" tensorlayer:Instantiate GaussianNoiseLayer %s: keep: %f" % (self.name, keep))
with tf.variable_scope(name) as vs:
pass
# rnn
class BidirectionalRNNLayer(Layer):
"""
Coming soon
The :class:`BidirectionalRNNLayer` class is a RNN layer.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
n_hidden : a int
The number of hidden units in the layer.
n_steps : a int
The sequence length.
return_last : boolen
If True, return the last output, "Sequence input and single output"
If False, return all outputs, "Synced sequence input and output"
In other word, if you want to apply one or more RNN(s) on this layer, set to False.
cell_init_args : a dictionary
The arguments for the cell initializer.
is_reshape : boolen
Reshape the inputs to 3 dimension tensor.
If input is[batch_size, n_steps, n_features], we do not need to reshape it.
If input is [batch_size * n_steps, n_features], we need to reshape it.
name : a string or None
An optional name to attach to this layer.
Field (Class Variables)
-----------------------
outputs : a tensor
The output of this RNN.
state : a tensor
When state_is_tuple=False
It is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden]
Examples
--------
>>>
Notes
-----
References
----------
`Neural Network RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
"""
def __init__(
self,
layer = None,
n_hidden = 100,
n_steps = 5,
return_last = False,
is_reshape = True,
cell_init_args = {'forget_bias': 1.0},#, 'input_size' : None, 'state_is_tuple' : False, 'activation' : 'tanh' },
name = 'birnn_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
self.n_units = n_hidden
print(" tensorlayer:Instantiate BidirectionalRNNLayer %s: n_hidden:%d, n_steps:%d, dim:%d %s" % (self.name, n_hidden,
n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape()))
with tf.variable_scope(name) as vs:
pass
class DynamicRNNLayer(Layer):
"""
Coming soon
The :class:`DynamicRNNLayer` class is a RNN layer.
Parameters
----------
layer : a :class:`Layer` instance
The `Layer` class feeding into this layer.
n_hidden : a int
The number of hidden units in the layer.
n_steps : a int
The sequence length.
return_last : boolen
If True, return the last output, "Sequence input and single output"
If False, return all outputs, "Synced sequence input and output"
In other word, if you want to apply one or more RNN(s) on this layer, set to False.
cell_init_args : a dictionary
The arguments for the cell initializer.
is_reshape : boolen
Reshape the inputs to 3 dimension tensor.
If input is[batch_size, n_steps, n_features], we do not need to reshape it.
If input is [batch_size * n_steps, n_features], we need to reshape it.
name : a string or None
An optional name to attach to this layer.
Field (Class Variables)
-----------------------
outputs : a tensor
The output of this RNN.
state : a tensor
When state_is_tuple=False
It is the final hidden and cell states, states.get_shape() = [?, 2 * n_hidden]
Examples
--------
>>>
Notes
-----
References
----------
`Neural Network RNN Cells in TensorFlow <https://www.tensorflow.org/versions/master/api_docs/python/rnn_cell.html>`_
"""
def __init__(
self,
layer = None,
n_hidden = 100,
n_steps = 5,
return_last = False,
is_reshape = True,
cell_init_args = {'forget_bias': 1.0},#, 'input_size' : None, 'state_is_tuple' : False, 'activation' : 'tanh' },
name = 'basic_lstm_layer',
):
Layer.__init__(self, name=name)
self.inputs = layer.outputs
self.n_units = n_hidden
#