Source code for encoders.cnn_encoder

# Copyright (c) 2018 NVIDIA Corporation
"""
This module contains classes and functions to build "general" convolutional
neural networks from the description of arbitrary "layers".
"""
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

import copy

import tensorflow as tf

try:
  from inspect import signature
except ImportError:
  from funcsigs import signature

from open_seq2seq.utils.utils import deco_print
from .encoder import Encoder


[docs]def build_layer(inputs, layer, layer_params, data_format, regularizer, training, verbose=True): """This function builds a layer from the layer function and it's parameters. It will automatically add regularizer parameter to the layer_params if the layer supports regularization. To check this, it will look for the "regularizer", "kernel_regularizer" and "gamma_regularizer" names in this order in the ``layer`` call signature. If one of this parameters is supported it will pass regularizer object as a value for that parameter. Based on the same "checking signature" technique "data_format" and "training" parameters will try to be added. Finally, "axis" parameter will try to be specified with axis = ``1 if data_format == 'channels_first' else 3``. This is required for automatic building batch normalization layer. Args: inputs: input Tensor that will be passed to the layer. Note that layer has to accept input as the first parameter. layer: layer function or class with ``__call__`` method defined. layer_params (dict): parameters passed to the ``layer``. data_format (string): data format ("channels_first" or "channels_last") that will be tried to be passed as an additional argument. regularizer: regularizer instance that will be tried to be passed as an additional argument. training (bool): whether layer is built in training mode. Will be tried to be passed as an additional argument. verbose (bool): whether to print information about built layers. Returns: Tensor with layer output. """ layer_params_cp = copy.deepcopy(layer_params) for reg_name in ['regularizer', 'kernel_regularizer', 'gamma_regularizer']: if reg_name not in layer_params_cp and \ reg_name in signature(layer).parameters: layer_params_cp.update({reg_name: regularizer}) if 'data_format' not in layer_params_cp and \ 'data_format' in signature(layer).parameters: layer_params_cp.update({'data_format': data_format}) # necessary to check axis for correct batch normalization processing if 'axis' not in layer_params_cp and \ 'axis' in signature(layer).parameters: layer_params_cp.update({'axis': 1 if data_format == 'channels_first' else 3}) if 'training' not in layer_params_cp and \ 'training' in signature(layer).parameters: layer_params_cp.update({'training': training}) outputs = layer(inputs, **layer_params_cp) if verbose: if hasattr(layer, '_tf_api_names'): layer_name = layer._tf_api_names[0] else: layer_name = layer deco_print("Building layer: {}(inputs, {})".format( layer_name, ", ".join("{}={}".format(key, value) for key, value in layer_params_cp.items()) )) return outputs
[docs]class CNNEncoder(Encoder): """General CNN encoder that can be used to construct various different models. """
[docs] @staticmethod def get_required_params(): return dict(Encoder.get_required_params(), **{ 'cnn_layers': list, })
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'data_format': ['channels_first', 'channels_last'], 'fc_layers': list, })
[docs] def __init__(self, params, model, name="cnn_encoder", mode='train'): """CNN Encoder constructor. See parent class for arguments description. Config parameters: * **cnn_layers** (list) --- list with the description of "convolutional" layers. For example:: "conv_layers": [ (tf.layers.conv2d, { 'filters': 64, 'kernel_size': (11, 11), 'strides': (4, 4), 'padding': 'VALID', 'activation': tf.nn.relu, }), (tf.layers.max_pooling2d, { 'pool_size': (3, 3), 'strides': (2, 2), }), (tf.layers.conv2d, { 'filters': 192, 'kernel_size': (5, 5), 'strides': (1, 1), 'padding': 'SAME', }), (tf.layers.batch_normalization, {'momentum': 0.9, 'epsilon': 0.0001}), (tf.nn.relu, {}), ] Note that you don't need to provide "regularizer", "training", "data_format" and "axis" parameters since they will be automatically added. "axis" will be derived from "data_format" and will be ``1 if data_format == "channels_first" else 3``. * **fc_layers** (list) --- list with the description of "fully-connected" layers. The only different from convolutional layers is that the input will be automatically reshaped to 2D (batch size x num features). For example:: 'fc_layers': [ (tf.layers.dense, {'units': 4096, 'activation': tf.nn.relu}), (tf.layers.dropout, {'rate': 0.5}), (tf.layers.dense, {'units': 4096, 'activation': tf.nn.relu}), (tf.layers.dropout, {'rate': 0.5}), ], Note that you don't need to provide "regularizer", "training", "data_format" and "axis" parameters since they will be automatically added. "axis" will be derived from "data_format" and will be ``1 if data_format == "channels_first" else 3``. * **data_format** (string) --- could be either "channels_first" or "channels_last". Defaults to "channels_first". """ super(CNNEncoder, self).__init__(params, model, name, mode)
def _encode(self, input_dict): regularizer = self.params.get('regularizer', None) data_format = self.params.get('data_format', 'channels_first') x = input_dict['source_tensors'][0] if data_format == 'channels_first': x = tf.transpose(x, [0, 3, 1, 2]) for layer, layer_params in self.params['cnn_layers']: x = build_layer(x, layer, layer_params, data_format, regularizer, self.mode == 'train') if data_format == 'channels_first': x = tf.transpose(x, [0, 2, 3, 1]) fc_layers = self.params.get('fc_layers', []) # if fully connected layers exist, flattening the output and applying them if fc_layers: input_shape = x.get_shape().as_list() num_inputs = input_shape[1] * input_shape[2] * input_shape[3] x = tf.reshape(x, [-1, num_inputs]) for layer, layer_params in fc_layers: x = build_layer(x, layer, layer_params, data_format, regularizer, self.mode == 'train') else: # if there are no fully connected layers, doing average pooling x = tf.reduce_mean(x, [1, 2]) return {'outputs': x}