Source code for encoders.resnet_encoder

# Copyright (c) 2018 NVIDIA Corporation
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

import tensorflow as tf
from .resnet_blocks import conv2d_fixed_padding, batch_norm, block_layer, \
                           bottleneck_block_v1, bottleneck_block_v2, \
                           building_block_v1, building_block_v2
from .encoder import Encoder


[docs]class ResNetEncoder(Encoder):
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'resnet_size': int, 'block_sizes': list, 'block_strides': list, 'version': [1, 2], 'bottleneck': bool, 'final_size': int, 'first_num_filters': int, 'first_kernel_size': int, 'first_conv_stride': int, 'first_pool_size': int, 'first_pool_stride': int, 'data_format': ['channels_first', 'channels_last'], 'regularize_bn': bool, 'bn_momentum': float, 'bn_epsilon': float, })
def __init__(self, params, model, name="resnet_encoder", mode='train'): super(ResNetEncoder, self).__init__(params, model, name, mode) def _encode(self, input_dict): inputs = input_dict['source_tensors'][0] if 'resnet_size' not in self.params and 'block_sizes' not in self.params: raise ValueError('Either "resnet_size" or "block_sizes" ' 'have to be specified in the config') if 'resnet_size' in self.params and 'block_sizes' in self.params: raise ValueError('"resnet_size" and "block_sizes" cannot ' 'be specified together') if 'resnet_size' in self.params: if self.params['resnet_size'] < 50: bottleneck = self.params.get('bottleneck', False) final_size = self.params.get('final_size', 512) else: bottleneck = self.params.get('bottleneck', True) final_size = self.params.get('final_size', 2048) block_sizes_dict = { 18: [2, 2, 2, 2], 34: [3, 4, 6, 3], 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], 200: [3, 24, 36, 3], } block_sizes = block_sizes_dict[self.params['resnet_size']] else: if 'bottleneck' not in self.params: raise ValueError('If "resnet_size" not specified you have to provide ' '"bottleneck" parameter') if 'final_size' not in self.params: raise ValueError('If "resnet_size" not specified you have to provide ' '"final_size" parameter') bottleneck = self.params['bottleneck'] final_size = self.params['final_size'] block_sizes = self.params['block_sizes'] num_filters = self.params.get('first_num_filters', 64) kernel_size = self.params.get('first_kernel_size', 7) conv_stride = self.params.get('first_conv_stride', 2) first_pool_size = self.params.get('first_pool_size', 3) first_pool_stride = self.params.get('first_pool_stride', 2) block_strides = self.params.get('block_strides', [1, 2, 2, 2]) version = self.params.get('version', 2) data_format = self.params.get('data_format', 'channels_first') bn_momentum = self.params.get('bn_momentum', 0.997) bn_epsilon = self.params.get('bn_epsilon', 1e-5) if bottleneck: if version == 1: block_fn = bottleneck_block_v1 else: block_fn = bottleneck_block_v2 else: if version == 1: block_fn = building_block_v1 else: block_fn = building_block_v2 training = self.mode == 'train' regularizer = self.params.get('regularizer', None) regularize_bn = self.params.get('regularize_bn', True) bn_regularizer = regularizer if regularize_bn else None if data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 3, 1, 2]) inputs = conv2d_fixed_padding( inputs=inputs, filters=num_filters, kernel_size=kernel_size, strides=conv_stride, data_format=data_format, regularizer=regularizer, ) inputs = tf.identity(inputs, 'initial_conv') if version == 1: inputs = batch_norm(inputs, training, data_format, regularizer=bn_regularizer, momentum=bn_momentum, epsilon=bn_epsilon) inputs = tf.nn.relu(inputs) if first_pool_size: inputs = tf.layers.max_pooling2d( inputs=inputs, pool_size=first_pool_size, strides=first_pool_stride, padding='SAME', data_format=data_format, ) inputs = tf.identity(inputs, 'initial_max_pool') for i, num_blocks in enumerate(block_sizes): cur_num_filters = num_filters * (2**i) inputs = block_layer( inputs=inputs, filters=cur_num_filters, bottleneck=bottleneck, block_fn=block_fn, blocks=num_blocks, strides=block_strides[i], training=training, name='block_layer{}'.format(i + 1), data_format=data_format, regularizer=regularizer, bn_regularizer=bn_regularizer, bn_momentum=bn_momentum, bn_epsilon=bn_epsilon, ) if version == 2: inputs = batch_norm(inputs, training, data_format, regularizer=bn_regularizer, momentum=bn_momentum, epsilon=bn_epsilon) inputs = tf.nn.relu(inputs) # The current top layer has shape # `batch_size x pool_size x pool_size x final_size`. # ResNet does an Average Pooling layer over pool_size, # but that is the same as doing a reduce_mean. We do a reduce_mean # here because it performs better than AveragePooling2D. axes = [2, 3] if data_format == 'channels_first' else [1, 2] inputs = tf.reduce_mean(inputs, axes, keepdims=True) inputs = tf.identity(inputs, 'final_reduce_mean') outputs = tf.reshape(inputs, [-1, final_size]) return {'outputs': outputs}