# Copyright (c) 2017 NVIDIA Corporation
"""
Module containing various learning rate policies. Learning rate policy can
be any function that takes arbitrary arguments from the config (with additional
``global_step`` variable provided automatically) and returns learning rate
value for the current step.
"""
from __future__ import absolute_import, division, print_function
from __future__ import unicode_literals
import math
import tensorflow as tf
from tensorflow.python.framework import ops
[docs]def fixed_lr(global_step, learning_rate):
"""Fixed learning rate policy.
This function always returns ``learning_rate``, ignoring ``global_step``
value.
Args:
global_step: global step TensorFlow tensor (ignored for this policy).
learning_rate (float): fixed learning rate to use.
Returns:
learning rate at step ``global_step``.
"""
return learning_rate
[docs]def piecewise_constant(global_step, learning_rate, boundaries,
decay_rates, steps_per_epoch=None):
"""Piecewise constant learning rate decay.
When defined in the config, only ``boundaries`` and ``decay_rates`` need to
be provided (other parameters are automatically populated by
:class:`Model<models.model.Model>` class). ``boundaries`` are treated as
epochs if ``num_epochs`` is provided in the config, otherwise treated as
steps.
Args:
global_step: global step TensorFlow tensor.
learning_rate (float): initial learning rate to use.
boundaries (list): could be either defined in steps
(if ``batches_per_epoch=None``) or in epochs if ``batches_per_epoch``
parameter is defined.
decay_rates: multiplier of the initial learning rate for each boundary.
steps_per_epoch: number of batches in one training epoch. If provided,
boundaries are treated as epochs, otherwise as steps.
Returns:
learning rate at step ``global_step``.
"""
if steps_per_epoch is not None:
boundaries = [steps_per_epoch * epoch for epoch in boundaries]
decay_rates = [1.0] + decay_rates
vals = [learning_rate * decay for decay in decay_rates]
return tf.train.piecewise_constant(global_step, boundaries, vals)
[docs]def exp_decay(global_step, learning_rate, decay_steps, decay_rate,
use_staircase_decay, begin_decay_at=0, min_lr=0.0):
"""Exponential decay learning rate policy.
This function is equivalent to ``tensorflow.train.exponential_decay`` with
some additional functionality. Namely, it adds ``begin_decay_at`` parameter
and ``min_lr`` parameter which are the first step to start decaying learning
rate and minimal value of the learning rate correspondingly.
Args:
global_step: global step TensorFlow tensor.
learning_rate (float): initial learning rate to use.
decay_steps (int): number of steps to apply decay for.
decay_rate (float): the rate of the decay.
use_staircase_decay (bool): whether to use staircase decay.
begin_decay_at (int): the first step to start decaying learning rate.
min_lr (float): minimal value of the learning rate.
Returns:
learning rate at step ``global_step``.
"""
new_lr = tf.cond(
global_step < begin_decay_at,
lambda: learning_rate,
lambda: tf.train.exponential_decay(
learning_rate,
global_step - begin_decay_at,
decay_steps,
decay_rate,
staircase=use_staircase_decay),
name="learning_rate",
)
final_lr = tf.maximum(min_lr, new_lr)
return final_lr
[docs]def poly_decay(global_step, learning_rate, decay_steps, power=1.0,
begin_decay_at=0, min_lr=0.0, warmup_steps=0):
"""Polynomial decay learning rate policy.
This function is equivalent to ``tensorflow.train.polynomial_decay`` with
some additional functionality. Namely, it adds ``begin_decay_at`` parameter
which is the first step to start decaying learning rate.
Args:
global_step: global step TensorFlow tensor.
learning_rate (float): initial learning rate to use.
decay_steps (int): number of steps to apply decay for.
power (float): power for polynomial decay.
begin_decay_at (int): the first step to start decaying learning rate.
min_lr (float): minimal value of the learning rate
(same as ``end_learning_rate`` TensorFlow parameter).
Returns:
learning rate at step ``global_step``.
"""
begin_decay_at = max(warmup_steps, begin_decay_at)
if warmup_steps > 0:
# g_step = tf.cast(global_step, dtype=tf.float32)
# warmup = tf.cast(warmup_steps, dtype=tf.float32)
learning_rate = tf.cond(
global_step < warmup_steps,
lambda: (learning_rate*tf.cast(global_step,tf.float32)/tf.cast(warmup_steps,tf.float32)),
lambda: learning_rate,
)
lr = tf.cond(
global_step < begin_decay_at,
lambda: learning_rate,
lambda: tf.train.polynomial_decay(
learning_rate,
global_step=global_step-begin_decay_at,
decay_steps=decay_steps,
end_learning_rate=min_lr,
power=power),
name="learning_rate"
)
return lr
[docs]def inv_poly_decay(global_step, learning_rate, decay_steps, min_lr,
power=1.0, begin_decay_at=0, warmup_steps=0,
name="learning_rate"):
"""Inverse poly decay learning rate policy.
lr = initial lr / ( 1+ decay * t)^power
This function is similar to ``tensorflow.train.inv_time_decay`` with
some additional functionality. Namely, it adds :
``min_lr`` - end learning rate with 0.00001
``power`` - power
``begin_decay_at``- first step to start decaying learning rate.
Args:
global_step: global step TensorFlow tensor.
learning_rate (float): initial learning rate to use.
decay_steps (int): number of steps to apply decay for.
power (float): power for inv_time_decay.
begin_decay_at (int): the first step to start decaying learning rate.
min_lr (float): minimal value of the learning rate
(same as ``end_learning_rate`` TensorFlow parameter).
Returns:
learning rate at step ``global_step``.
"""
min_lr=max(min_lr, 1e-8)
min_lr=min(min_lr, learning_rate)
if power <= 0.:
raise ValueError("Inv poly decay requires power > 0.")
if global_step is None:
raise ValueError("Inv poly decay requires global_step")
with ops.name_scope(name, "InvDecay",
[learning_rate, global_step]) as name:
scale = (math.pow(learning_rate / min_lr, 1./power) - 1.) / decay_steps
learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate")
decay_steps = tf.cast(decay_steps, tf.float32)
global_step = tf.cast(global_step, tf.float32)
denom = tf.pow(1. + scale * global_step , power)
lr = tf.div(learning_rate, denom, name=name)
return lr