Source code for encoders.rnn_encoders

# Copyright (c) 2018 NVIDIA Corporation
"""
RNN-based encoders
"""
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals

import tensorflow as tf
from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops

from open_seq2seq.parts.rnns.utils import single_cell
from .encoder import Encoder


[docs]class UnidirectionalRNNEncoderWithEmbedding(Encoder): """ Uni-directional RNN decoder with embeddings. Can support various RNN cell types. """
[docs] @staticmethod def get_required_params(): return dict(Encoder.get_required_params(), **{ 'src_vocab_size': int, 'src_emb_size': int, 'core_cell': None, 'core_cell_params': dict, 'encoder_layers': int, 'encoder_use_skip_connections': bool, })
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'encoder_dp_input_keep_prob': float, 'encoder_dp_output_keep_prob': float, 'time_major': bool, 'use_swap_memory': bool, 'proj_size': int, 'num_groups': int, })
[docs] def __init__(self, params, model, name="unidir_rnn_encoder_with_emb", mode='train'): """Initializes uni-directional encoder with embeddings. Args: params (dict): dictionary with encoder parameters Must define: * src_vocab_size - data vocabulary size * src_emb_size - size of embedding to use * encoder_cell_units - number of units in RNN cell * encoder_cell_type - cell type: lstm, gru, etc. * encoder_layers - number of layers * encoder_dp_input_keep_prob - * encoder_dp_output_keep_prob - * encoder_use_skip_connections - true/false * time_major (optional) * use_swap_memory (optional) * mode - train or infer ... add any cell-specific parameters here as well """ super(UnidirectionalRNNEncoderWithEmbedding, self).__init__( params, model, name=name, mode=mode, ) self._src_vocab_size = self.params['src_vocab_size'] self._src_emb_size = self.params['src_emb_size'] self._enc_emb_w = None self._encoder_cell_fw = None
[docs] def _encode(self, input_dict): """Encodes data into representation. Args: input_dict: a Python dictionary. Must define: * src_inputs - a Tensor of shape [batch_size, time] or [time, batch_size] (depending on time_major param) * src_lengths - a Tensor of shape [batch_size] Returns: a Python dictionary with: * encoder_outputs - a Tensor of shape [batch_size, time, representation_dim] or [time, batch_size, representation_dim] * encoder_state - a Tensor of shape [batch_size, dim] * src_lengths - (copy ref from input) a Tensor of shape [batch_size] """ # TODO: make a separate level of config for cell_params? source_sequence = input_dict['source_tensors'][0] source_length = input_dict['source_tensors'][1] self._enc_emb_w = tf.get_variable( name="EncoderEmbeddingMatrix", shape=[self._src_vocab_size, self._src_emb_size], dtype=tf.float32, ) if self._mode == "train": dp_input_keep_prob = self.params['encoder_dp_input_keep_prob'] dp_output_keep_prob = self.params['encoder_dp_output_keep_prob'] else: dp_input_keep_prob = 1.0 dp_output_keep_prob = 1.0 fwd_cells = [ single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=dp_input_keep_prob, dp_output_keep_prob=dp_output_keep_prob, residual_connections=self.params['encoder_use_skip_connections'] ) for _ in range(self.params['encoder_layers']) ] # pylint: disable=no-member self._encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(fwd_cells) time_major = self.params.get("time_major", False) use_swap_memory = self.params.get("use_swap_memory", False) embedded_inputs = tf.cast( tf.nn.embedding_lookup( self.enc_emb_w, source_sequence, ), self.params['dtype'], ) encoder_outputs, encoder_state = tf.nn.dynamic_rnn( cell=self._encoder_cell_fw, inputs=embedded_inputs, sequence_length=source_length, time_major=time_major, swap_memory=use_swap_memory, dtype=embedded_inputs.dtype, ) return {'outputs': encoder_outputs, 'state': encoder_state, 'src_lengths': source_length, 'encoder_input': source_sequence}
@property def src_vocab_size(self): return self._src_vocab_size @property def src_emb_size(self): return self._src_emb_size @property def enc_emb_w(self): return self._enc_emb_w
[docs]class BidirectionalRNNEncoderWithEmbedding(Encoder): """ Bi-directional RNN-based encoder with embeddings. Can support various RNN cell types. """
[docs] @staticmethod def get_required_params(): return dict(Encoder.get_required_params(), **{ 'src_vocab_size': int, 'src_emb_size': int, 'encoder_layers': int, 'encoder_use_skip_connections': bool, 'core_cell': None, 'core_cell_params': dict, })
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'encoder_dp_input_keep_prob': float, 'encoder_dp_output_keep_prob': float, 'time_major': bool, 'use_swap_memory': bool, 'proj_size': int, 'num_groups': int, })
[docs] def __init__(self, params, model, name="bidir_rnn_encoder_with_emb", mode='train'): """Initializes bi-directional encoder with embeddings. Args: params (dict): dictionary with encoder parameters Must define: * src_vocab_size - data vocabulary size * src_emb_size - size of embedding to use * encoder_cell_units - number of units in RNN cell * encoder_cell_type - cell type: lstm, gru, etc. * encoder_layers - number of layers * encoder_dp_input_keep_prob - * encoder_dp_output_keep_prob - * encoder_use_skip_connections - true/false * time_major (optional) * use_swap_memory (optional) * mode - train or infer ... add any cell-specific parameters here as well Returns: encoder_params """ super(BidirectionalRNNEncoderWithEmbedding, self).__init__( params, model, name=name, mode=mode, ) self._src_vocab_size = self.params['src_vocab_size'] self._src_emb_size = self.params['src_emb_size'] self._enc_emb_w = None self._encoder_cell_fw = None self._encoder_cell_bw = None
[docs] def _encode(self, input_dict): """Encodes data into representation. Args: input_dict: a Python dictionary. Must define: *src_inputs - a Tensor of shape [batch_size, time] or [time, batch_size] (depending on time_major param) * src_lengths - a Tensor of shape [batch_size] Returns: a Python dictionary with: * encoder_outputs - a Tensor of shape [batch_size, time, representation_dim] or [time, batch_size, representation_dim] * encoder_state - a Tensor of shape [batch_size, dim] * src_lengths - (copy ref from input) a Tensor of shape [batch_size] """ source_sequence = input_dict['source_tensors'][0] source_length = input_dict['source_tensors'][1] time_major = self.params.get("time_major", False) use_swap_memory = self.params.get("use_swap_memory", False) self._enc_emb_w = tf.get_variable( name="EncoderEmbeddingMatrix", shape=[self._src_vocab_size, self._src_emb_size], dtype=tf.float32 ) if self._mode == "train": dp_input_keep_prob = self.params['encoder_dp_input_keep_prob'] dp_output_keep_prob = self.params['encoder_dp_output_keep_prob'] else: dp_input_keep_prob = 1.0 dp_output_keep_prob = 1.0 fwd_cells = [ single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=dp_input_keep_prob, dp_output_keep_prob=dp_output_keep_prob, residual_connections=self.params['encoder_use_skip_connections'], ) for _ in range(self.params['encoder_layers']) ] bwd_cells = [ single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=dp_input_keep_prob, dp_output_keep_prob=dp_output_keep_prob, residual_connections=self.params['encoder_use_skip_connections'], ) for _ in range(self.params['encoder_layers']) ] with tf.variable_scope("FW"): # pylint: disable=no-member self._encoder_cell_fw = tf.contrib.rnn.MultiRNNCell(fwd_cells) with tf.variable_scope("BW"): # pylint: disable=no-member self._encoder_cell_bw = tf.contrib.rnn.MultiRNNCell(bwd_cells) embedded_inputs = tf.cast( tf.nn.embedding_lookup( self.enc_emb_w, source_sequence, ), self.params['dtype'] ) encoder_output, encoder_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=self._encoder_cell_fw, cell_bw=self._encoder_cell_bw, inputs=embedded_inputs, sequence_length=source_length, time_major=time_major, swap_memory=use_swap_memory, dtype=embedded_inputs.dtype, ) encoder_outputs = tf.concat(encoder_output, 2) return {'outputs': encoder_outputs, 'state': encoder_state, 'src_lengths': source_length, 'encoder_input': source_sequence}
@property def src_vocab_size(self): return self._src_vocab_size @property def src_emb_size(self): return self._src_emb_size @property def enc_emb_w(self): return self._enc_emb_w
[docs]class GNMTLikeEncoderWithEmbedding(Encoder): """ Encoder similar to the one used in GNMT model: https://arxiv.org/abs/1609.08144. Must have at least 2 layers """
[docs] @staticmethod def get_required_params(): return dict(Encoder.get_required_params(), **{ 'src_vocab_size': int, 'src_emb_size': int, 'core_cell': None, 'core_cell_params': dict, 'encoder_layers': int, 'encoder_use_skip_connections': bool, })
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'encoder_dp_input_keep_prob': float, 'encoder_dp_output_keep_prob': float, 'time_major': bool, 'use_swap_memory': bool, 'proj_size': int, 'num_groups': int, })
[docs] def __init__(self, params, model, name="gnmt_encoder_with_emb", mode='train'): """Encodes data into representation. Args: params (dict): a Python dictionary. Must define: * src_inputs - a Tensor of shape [batch_size, time] or [time, batch_size] (depending on time_major param) * src_lengths - a Tensor of shape [batch_size] Returns: a Python dictionary with: * encoder_outputs - a Tensor of shape [batch_size, time, representation_dim] or [time, batch_size, representation_dim] * encoder_state - a Tensor of shape [batch_size, dim] * src_lengths - (copy ref from input) a Tensor of shape [batch_size] """ super(GNMTLikeEncoderWithEmbedding, self).__init__( params, model, name=name, mode=mode, ) self._src_vocab_size = self.params['src_vocab_size'] self._src_emb_size = self.params['src_emb_size'] self._encoder_l1_cell_fw = None self._encoder_l1_cell_bw = None self._encoder_cells = None self._enc_emb_w = None
def _encode(self, input_dict): source_sequence = input_dict['source_tensors'][0] source_length = input_dict['source_tensors'][1] self._enc_emb_w = tf.get_variable( name="EncoderEmbeddingMatrix", shape=[self._src_vocab_size, self._src_emb_size], dtype=tf.float32, ) if self.params['encoder_layers'] < 2: raise ValueError("GNMT encoder must have at least 2 layers") with tf.variable_scope("Level1FW"): self._encoder_l1_cell_fw = single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=1.0, dp_output_keep_prob=1.0, residual_connections=False, ) with tf.variable_scope("Level1BW"): self._encoder_l1_cell_bw = single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=1.0, dp_output_keep_prob=1.0, residual_connections=False, ) if self._mode == "train": dp_input_keep_prob = self.params['encoder_dp_input_keep_prob'] dp_output_keep_prob = self.params['encoder_dp_output_keep_prob'] else: dp_input_keep_prob = 1.0 dp_output_keep_prob = 1.0 with tf.variable_scope("UniDirLevel"): self._encoder_cells = [ single_cell( cell_class=self.params['core_cell'], cell_params=self.params.get('core_cell_params', {}), dp_input_keep_prob=dp_input_keep_prob, dp_output_keep_prob=dp_output_keep_prob, residual_connections=False, ) for _ in range(self.params['encoder_layers'] - 1) ] # add residual connections starting from the third layer for idx, cell in enumerate(self._encoder_cells): if idx > 0: # pylint: disable=no-member self._encoder_cells[idx] = tf.contrib.rnn.ResidualWrapper(cell) time_major = self.params.get("time_major", False) use_swap_memory = self.params.get("use_swap_memory", False) embedded_inputs = tf.cast( tf.nn.embedding_lookup( self.enc_emb_w, source_sequence, ), self.params['dtype'], ) # first bi-directional layer _encoder_output, _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=self._encoder_l1_cell_fw, cell_bw=self._encoder_l1_cell_bw, inputs=embedded_inputs, sequence_length=source_length, swap_memory=use_swap_memory, time_major=time_major, dtype=embedded_inputs.dtype, ) encoder_l1_outputs = tf.concat(_encoder_output, 2) # stack of unidirectional layers # pylint: disable=no-member encoder_outputs, encoder_state = tf.nn.dynamic_rnn( cell=tf.contrib.rnn.MultiRNNCell(self._encoder_cells), inputs=encoder_l1_outputs, sequence_length=source_length, swap_memory=use_swap_memory, time_major=time_major, dtype=encoder_l1_outputs.dtype, ) return {'outputs': encoder_outputs, 'state': encoder_state, 'src_lengths': source_length, 'encoder_input': source_sequence} @property def src_vocab_size(self): return self._src_vocab_size @property def src_emb_size(self): return self._src_emb_size @property def enc_emb_w(self): return self._enc_emb_w
[docs]class GNMTLikeEncoderWithEmbedding_cuDNN(Encoder): """ Encoder similar to the one used in GNMT model: https://arxiv.org/abs/1609.08144. Must have at least 2 layers. Uses cuDNN RNN blocks for efficiency """
[docs] @staticmethod def get_required_params(): return dict(Encoder.get_required_params(), **{ 'src_vocab_size': int, 'src_emb_size': int, 'encoder_cell_units': int, 'encoder_cell_type': ['lstm', 'gru'], 'encoder_layers': int, })
[docs] @staticmethod def get_optional_params(): return dict(Encoder.get_optional_params(), **{ 'encoder_dp_output_keep_prob': float, })
[docs] def __init__(self, params, model, name="gnmt_encoder_with_emb_cudnn", mode='train'): """Encodes data into representation Args: params (dict): a Python dictionary. Must define: * src_inputs - a Tensor of shape [batch_size, time] or [time, batch_size] (depending on time_major param) * src_lengths - a Tensor of shape [batch_size] Returns: a Python dictionary with: * encoder_outputs - a Tensor of shape [batch_size, time, representation_dim] or [time, batch_size, representation_dim] * encoder_state - a Tensor of shape [batch_size, dim] * src_lengths - (copy ref from input) a Tensor of shape [batch_size] """ super(GNMTLikeEncoderWithEmbedding_cuDNN, self).__init__( params, model, name=name, mode=mode, ) self._src_vocab_size = self.params['src_vocab_size'] self._src_emb_size = self.params['src_emb_size'] self._enc_emb_w = None
def _encode(self, input_dict): source_sequence = input_dict['source_tensors'][0] source_length = input_dict['source_tensors'][1] self._enc_emb_w = tf.get_variable( name="EncoderEmbeddingMatrix", shape=[self._src_vocab_size, self._src_emb_size], dtype=tf.float32 ) if self.params['encoder_layers'] < 2: raise ValueError("GNMT encoder must have at least 2 layers") if self._mode == "train": dp_output_keep_prob = self.params['encoder_dp_output_keep_prob'] else: dp_output_keep_prob = 1.0 # source_sequence is of [batch, time] shape embedded_inputs = tf.cast( tf.nn.embedding_lookup( self.enc_emb_w, tf.transpose(source_sequence), # cudnn wants [time, batch, ...] ), self.params['dtype'], ) with tf.variable_scope("Bi_Directional_Layer"): direction = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION if self.params['encoder_cell_type'] == "gru": # pylint: disable=no-member bidirectional_block = tf.contrib.cudnn_rnn.CudnnGRU( num_layers=1, num_units=self.params['encoder_cell_units'], direction=direction, dropout=0.0, dtype=self.params['dtype'], name="cudnn_gru_bidi", ) elif self.params['encoder_cell_type'] == "lstm": # pylint: disable=no-member bidirectional_block = tf.contrib.cudnn_rnn.CudnnLSTM( num_layers=1, num_units=self.params['encoder_cell_units'], direction=direction, dropout=0.0, dtype=self.params['dtype'], name="cudnn_lstm_bidi", ) else: raise ValueError( "{} is not a valid rnn_type for cudnn_rnn layers".format( self.params['encoder_cell_units'] ) ) bidi_output, bidi_state = bidirectional_block(embedded_inputs) with tf.variable_scope("Uni_Directional_Layer"): direction = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION layer_input = bidi_output for ind in range(self.params['encoder_layers'] - 1): with tf.variable_scope("uni_layer_{}".format(ind)): if self.params['encoder_cell_type'] == "gru": # pylint: disable=no-member unidirectional_block = tf.contrib.cudnn_rnn.CudnnGRU( num_layers=1, num_units=self.params['encoder_cell_units'], direction=direction, dropout=1.0 - dp_output_keep_prob, dtype=self.params['dtype'], name="cudnn_gru_uni_{}".format(ind), ) elif self.params['encoder_cell_type'] == "lstm": # pylint: disable=no-member unidirectional_block = tf.contrib.cudnn_rnn.CudnnLSTM( num_layers=1, num_units=self.params['encoder_cell_units'], direction=direction, dropout=1.0 - dp_output_keep_prob, dtype=self.params['dtype'], name="cudnn_lstm_uni_{}".format(ind), ) layer_output, encoder_state = unidirectional_block(layer_input) if ind > 0: # add residual connection layer_output = layer_input + layer_output layer_input = layer_output return {'outputs': tf.transpose(layer_input, perm=[1, 0, 2]), 'state': None, 'src_lengths': source_length, 'encoder_input': source_sequence} @property def src_vocab_size(self): return self._src_vocab_size @property def src_emb_size(self): return self._src_emb_size @property def enc_emb_w(self): return self._enc_emb_w