from abc import ABC, abstractmethod
from typing import Optional, Union
import numpy as np
from pynol.environment.domain import Domain
from pynol.environment.environment import Environment
[docs]class Base(ABC):
"""An abstract class for base algorithms.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round $t$.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
"""
def __init__(self,
domain: Domain,
step_size: Union[float, np.ndarray],
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
self.domain = domain
self.step_size = step_size
self.prior = prior
self.seed = seed
self.x = self.domain.init_x(prior, seed)
self.t = 0
[docs] def opt(self, env: Environment):
"""The optimization process of the base algorithm.
All base algorithms are divided into two parts:
:meth:`~pynol.learner.base.Base.opt_by_optimism` at the beginning of
current round and :meth:`~pynol.learner.base.Base.opt_by_gradient` at the
end of current round.
Args:
env (Environment): Environment at the current round.
Returns:
tuple: tuple contains:
x (numpy.ndarray): Decision at the current round. \n
loss (float): Origin loss at the current round. \n
surrogate_loss (float): the surrogate loss at the current round.
"""
self.opt_by_optimism(env.optimism)
return self.opt_by_gradient(env)
[docs] def get_step_size(self):
"""Get the step size at each round.
Returns:
float: Step size at the current round.
"""
return self.step_size[self.t] if hasattr(self.step_size,
'__len__') else self.step_size
[docs] def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
"""Optimize by the optimism.
Args:
optimism (numpy.ndarray, optional): External optimism at the beginning of the
current round.
Returns:
None
"""
pass
[docs] @abstractmethod
def opt_by_gradient(self, env: Environment):
"""Optimize by the true gradient.
All base-algorithms are required to override this method to implement
their own optimization process.
Args:
env (Environment): Environment at the current round.
Returns:
tuple: tuple contains:
x (numpy.ndarray): the decision at the current round. \n
loss (float): the origin loss at the current round. \n
surrogate_loss (float): the surrogate loss at the current round.
"""
pass
[docs] def reinit(self):
"""Reset the base algorithm to the initial state, which is used for
adaptive algorithms.
"""
self.__init__(self.domain, self.step_size, self.prior, self.seed)
[docs]class OGD(Base):
"""Implementation of Online Gradient Descent.
``OGD`` stands for Online Gradient Descent, the most popular algorithm for
online learning. `OGD` updates the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}} [x_t - \eta_t \\nabla f_t(x_t)]
where :math:`\eta_t > 0` is the step size at round `t`, and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round $t$.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
"""
def __init__(self,
domain: Domain,
step_size: Union[float, np.ndarray],
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, prior, seed)
[docs] def opt_by_gradient(self, env: Environment):
x = self.x
loss, surrogate_loss = env.get_loss(x)
grad = env.get_grad(x)
step_size = self.get_step_size()
self.x = x - step_size * grad
self.x = self.domain.project(self.x)
return x, loss, surrogate_loss
[docs]class BGDOnePoint(Base):
"""Implementation of Bandit Convex Optimization with one-point feedback.
``BGDOnePoint`` stands for Bandit Convex Optimization with one-point
feedback, in which at each round the learner submits one decision and then
can only observe the function value of this point. ``BGDOnePoint`` updates
the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}}[x_t - \eta_t \\tilde{g}_t] \mbox{ with }
\\tilde{g}_t = \\frac{d}{\delta}f_t(x_t + \delta s_t) \cdot s_t,
where :math:`\eta_t > 0` is the step size at round `t`, :math:`d` is the
dimension, :math:`\delta` is the scale of the perturbation, :math:`s_t` is
the unit vector selected uniformly at random and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round $t$.
scale_perturb (float): Scale of perturbation :math:`\delta`.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
https://arxiv.org/abs/cs/0408007
"""
def __init__(self,
domain: Domain,
step_size: Union[float, np.ndarray],
scale_perturb: float,
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, prior, seed)
self.scale_perturb = scale_perturb
self.shrink = self.scale_perturb / self.domain.r
[docs] def opt_by_gradient(self, env: Environment):
"""Optimize by the estimated gradient.
Args:
env (Environment): Environment at the current round.
Returns:
tuple: tuple contains:
x (numpy.ndarray): Decision at the current round. \n
loss (float): the origin loss at the current round. \n
surrogate_loss (float): Surrogate loss at the current round.
"""
unit_vec = self.domain.unit_vec()
x1 = self.x + self.scale_perturb * unit_vec
loss, surrogate_loss = env.get_loss(x1)
grad = self.get_grad(self.domain.dimension, loss, unit_vec)
step_size = self.get_step_size()
self.x = self.x - step_size * grad
self.x = ((1 - self.shrink) * self.domain).project(self.x)
self.t += 1
return x1, loss, surrogate_loss
[docs] def get_grad(self, dimension, loss, unit_vec) -> np.ndarray:
"""Estimate the gradient by :math:`\\tilde{g}_t =
\\frac{d}{\delta}f_t(x_t + \delta s_t) \cdot s_t`.
Args:
dimension (int): Dimension of the decision.
loss (float): Loss of the submitted decision.
unit_vec (numpy.ndarray): Unit vector used to perturb the decision.
Returns:
numpy.ndarray: the estimated gradient at the current round.
"""
grad = dimension / self.scale_perturb * loss * unit_vec
return grad
[docs] def reinit(self):
self.__init__(self.domain, self.step_size, self.scale_perturb,
self.prior, self.seed)
[docs]class BGDTwoPoint(Base):
"""Implementation of Bandit Convex Optimization with Two-point Feedback.
``BGDTwoPoint`` stands for Bandit Convex Optimization with two-point
feedback, in which at each round the learner submits two decisions and then
can only observe the function values of these two points. ``BGDTwoPoint``
updates the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}} [x_t - \eta_t \\tilde{g}_t] \mbox{ with }
\\tilde{g}_t = \\frac{d}{2\delta}(f_t(x_t + \delta s_t) - f_t(x_t -
\delta s_t))\cdot s_t,
where :math:`\eta_t > 0` is the step size at round `t`, :math:`d` is the
dimension, :math:`\delta` is the scale of the perturbation, :math:`s_t` is
the unit vector selected uniformly at random, and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round $t$.
scale_perturb (float): Scale of perturbation :math:`\delta`.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
https://alekhagarwal.net/bandits-colt.pdf
"""
def __init__(self,
domain: Domain,
step_size: Union[float, list],
scale_perturb: float,
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, prior, seed)
self.scale_perturb = scale_perturb
self.shrink = self.scale_perturb / self.domain.r
[docs] def opt_by_gradient(self, env: Environment):
"""Optimize by the estimated gradient.
Args:
env (Environment): Environment at the current round.
Returns:
tuple: tuple contains:
x (numpy.ndarray): Average decision at the current round. \n
loss (float): Average origin loss at the current round. \n
surrogate_loss (float): Average surrogate loss at the current round.
"""
x = self.x
unit_vec = self.domain.unit_vec()
x1 = self.x + self.scale_perturb * unit_vec
x2 = self.x - self.scale_perturb * unit_vec
loss1, surrogate_loss1 = env.get_loss(x1)
loss2, surrogate_loss2 = env.get_loss(x2)
loss = (loss1 + loss2) / 2
if surrogate_loss1 is not None and surrogate_loss2 is not None:
surrogate_loss = (surrogate_loss1 + surrogate_loss2) / 2
else:
surrogate_loss = None
grad = self.get_grad(self.domain.dimension, loss1, loss2, unit_vec)
step_size = self.get_step_size()
self.x = self.x - step_size * grad
self.x = ((1 - self.shrink) * self.domain).project(self.x)
self.t += 1
return x, loss, surrogate_loss
[docs] def get_grad(self, dimension, loss1, loss2, unit_vec):
"""Estimate the gradient by :math:`\\tilde{g}_t =
\\frac{d}{2\delta}(f_t(x_t + \delta s_t) - f_t(x_t - \delta s_t))\cdot
s_t`.
Args:
dimension (int): Dimension of the decision.
loss1 (float): Loss of the first submitted decision.
loss2 (float): Loss of the second submitted decision.
unit_vec (numpy.ndarray): Unit vector used to perturb the decision.
Returns:
numpy.ndarray: Estimated gradient at the current round.
"""
self.grad = dimension / (2 * self.scale_perturb) * (loss1 -
loss2) * unit_vec
return self.grad
[docs] def reinit(self):
self.__init__(self.domain, self.step_size, self.scale_perturb,
self.prior, self.seed)
[docs]class SOGD(Base):
"""Implementation of scale-free online learning.
``SOGD`` stands for Scale-free Online Gradient Descent, an online convex
optimization algorithm that achieves parameter-free and scale-free
simultaneously, which is useful in deriving small-loss bound for smooth
functions. ``SOGD`` updates the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}}[x_t - \\frac{1}{\sqrt{\delta +
\sum_{s=1}^{t-1} \\nabla f_s(x_s)}} \\nabla f_t(x_t)]
where :math:`\delta` is a small constant to avoid divide by :math:`0`, and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`. We set :math:`\delta=1` in our implementation.
Args:
domain (Domain): Feasible set for the base algorithm.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
https://arxiv.org/abs/1601.01974
"""
def __init__(self,
domain: Domain,
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, None, prior, seed)
self.grad_cum = 1
[docs] def opt_by_gradient(self, env: Environment):
x = self.x
loss, surrogate_loss = env.get_loss(x)
grad = env.get_grad(x)
self.grad_cum += np.dot(grad, grad)
step_size = 1 / np.sqrt(self.grad_cum)
self.x = self.x - step_size * grad
self.x = self.domain.project(self.x)
self.t += 1
return x, loss, surrogate_loss
[docs] def reinit(self):
self.__init__(self.domain, self.prior, self.seed)
[docs]class OEGD(Base):
"""Implementation of online extra-gradient descent.
``OEGD`` stands for Online Extra-Gradient Descent. It is shown to enjoy
gradient-variation regret guarantee, which would be smaller when the
environments evolve gradually. ``OEGD`` updates decision :math:`x_t` by
.. math::
x_t = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_{t-1}(\hat{x}_t)], \n
\hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_t(x_t)],
where :math:`\eta_t > 0` is is the step size at round :math:`t`, and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round $t$.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
https://proceedings.mlr.press/v23/chiang12.html
"""
def __init__(self,
domain: Domain,
step_size: Union[float, np.ndarray],
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, prior, seed)
self.middle_x = self.x
self.optimism = np.zeros_like(self.x)
[docs] def opt_by_gradient(self, env: Environment):
loss, surrogate_loss = env.get_loss(self.x)
grad = env.get_grad(self.x)
step_size = self.get_step_size()
self.middle_x = self.middle_x - step_size * grad
self.middle_x = self.domain.project(self.middle_x)
self.optimism = env.get_grad(self.middle_x)
self.t += 1
return self.x, loss, surrogate_loss
[docs] def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
step_size = self.get_step_size()
self.x = self.middle_x - step_size * self.optimism
self.x = self.domain.project(self.x)
[docs]class OptimisticBase(Base):
"""An abstract class for optimistic-type base algorithms.
The optimistic-type algorithms are very general and useful, and they can
achieve a tighter regret guarantee with benign “predictable sequences”.
There are mainly two types of optimistic algorithms: Optimistic Online
Mirror Descent (Optimistic OMD) and Optimistic Follow The Regularized Leader
(Optimistic FTRL). The general update rule of ``Optimistic OMD`` is as
follows,
.. math::
x_t = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle m_t, x\\rangle + D_R(x,
\hat{x}_t) \n
\hat{x}_{t+1} = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle \\nabla
f_t(x_t), x\\rangle + D_R(x, \hat{x}_t),
and the general update rule of ``Optimistic FTRL`` is as follows,
.. math::
x_t = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle \sum_{s=1}^{t-1}
\\nabla f_s(x_s) + m_t, x\\rangle + R(x),
where :math:`\eta_t` is the step size at round :math:`t`, :math:`m_t` is the
optimism at the beginning of round :math:`t`, serving as a guess of the true
gradient :math:`\\nabla f_t(x_t)` at round :math:`t`, :math:`R` is the
regularizer and :math:`D_R(\cdot, \cdot)` is the Bregman divergence with
respect to the regularizer :math:`R`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round :math:`t`.
optimism_type (str, optional): Type of optimism used for algorithm.
Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
``None``. if ``optimism_type='external'``, the algorithm will accept the
external specified :math:`m_t` by the environment at each round; if
``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
\\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
https://proceedings.mlr.press/v23/chiang12.html
"""
def __init__(self,
domain: Domain,
step_size: Union[float, list],
optimism_type: str = 'external',
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, prior, seed)
self.optimism_type = optimism_type
self.middle_x = self.x
self.optimism = np.zeros_like(self.x)
[docs] def compute_internal_optimism(self, env: Environment):
"""Compute the internal optimism.
Args:
env (Environment): Environment at the current round.
"""
if self.optimism_type is None or self.optimism_type == 'external':
pass
elif self.optimism_type == 'last_grad':
self.optimism = self.grad
elif self.optimism_type == 'middle_grad':
self.optimism = env.get_grad(self.middle_x)
else:
raise TypeError(f'{self.optimism_type} is not defined')
[docs] def reinit(self):
self.__init__(self.domain, self.step_size, self.optimism_type,
self.prior, self.seed)
[docs]class OptimisticOGD(OptimisticBase):
"""Implementation of Optimistic Online Gradient Descent.
``OptimisticOGD`` is an online convex optimization algorithm, which is a
specialization of ``Optimistic OMD`` with the Euclidean distance as the
regularizer. ``OptimisticOGD`` updates the decision :math:`x_{t}` by
.. math::
x_t = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t m_t] \n
\hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_t(x_t)],
where :math:`\eta_t > 0` is the step size at round :math:`t`, :math:`m_t` is
the optimism at the beginning of round :math:`t`, and
:math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round :math:`t`.
optimism_type (str, optional): Type of optimism used for algorithm.
Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
``None``. if ``optimism_type='external'``, the algorithm will accept the
external specified :math:`m_t` by the environment at each round; if
``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
\\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
.. note::
1. ``OGD`` is a special case of ``OptimisticOGD`` with ``optimism_type=None``.
2. ``OEGD`` is a special case of ``OptimisticOGD`` with ``optimism_type='middle_grad``.
References:
https://proceedings.mlr.press/v30/Rakhlin13.html
"""
def __init__(self,
domain: Domain,
step_size: Union[float, list],
optimism_type: str = 'external',
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, optimism_type, prior, seed)
[docs] def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
if self.optimism_type is None:
self.optimism = np.zeros_like(self.middle_x)
elif self.optimism_type == 'external':
self.optimism = optimism if optimism is not None else np.zeros_like(
self.middle_x)
else:
pass
step_size = self.get_step_size()
self.x = self.middle_x - step_size * self.optimism
self.x = self.domain.project(self.x)
[docs] def opt_by_gradient(self, env: Environment):
loss, surrogate_loss = env.get_loss(self.x)
self.grad = env.get_grad(self.x)
step_size = self.get_step_size()
self.middle_x = self.middle_x - step_size * self.grad
self.middle_x = self.domain.project(self.middle_x)
self.compute_internal_optimism(env)
self.t += 1
return self.x, loss, surrogate_loss
[docs]class OptimisticHedge(OptimisticBase):
"""Implementation of Optimistic Hedge.
``Hedge`` is the most popular algorithm for tracking the best expert
problem. ``Optimistic Hedge`` is an enhanced version of ``Hedge`` which can
further incorporate the knowledge of predictable sequences. There are two
versions of ``Optimistic Hedge``: the greedy version and the lazy version.
The greedy version is a special case of ``Optimistic OMD`` with the negative
entropy as the regularizer and the lazy version is a special case of
``Optimistic FTRL``. The greedy version ``Optimistic Hedge`` updates the
decision :math:`x_t` by
.. math::
{x}_t = \Pi_{\mathcal{X}}[\hat{x}_{t} \exp(-\eta_t m_{t})] \n
\hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t\exp(-\eta_t (\\nabla
f_t({x}_t) +a_t))],
and the lazy version ``Optimistic Hedge`` updates the decision :math:`x_t` by
.. math::
x_t = \Pi_{\mathcal{X}}[x_1 \exp(-\eta_t (\sum_{s=1}^{t-1} (\\nabla
f_s(x_s) +a_s) + m_t))],
where :math:`\eta_t > 0` is the step size at round :math:`t`, :math:`m_t` is
the optimism at the beginning of round :math:`t`, :math:`a_t` is the
correction term and :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection
onto the nearest point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): Step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round :math:`t`.
optimism_type (str, optional): Type of optimism used for algorithm.
Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
``None``. if ``optimism_type='external'``, the algorithm will accept the
external specified :math:`m_t` by the environment at each round; if
``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
\\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
is_lazy (bool): Type of the update version: lazy or greedy. The
default is False.
correct (bool): Whether to use correction term. :math:`a_t` is
set as :math:`a_t = \eta_t (\\nabla f_t(x_t) - m_t)^2` if ``correct=True`` and
:math:`a_t=0` otherwise. The default is False.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
References:
http://proceedings.mlr.press/v30/Rakhlin13.pdf
.. note::
1. Greedy version ``Optimistic Hedge`` is a special case of ``OptimisticOMD``
with the negative entropy as the regularizer.
2. Lazy version ``Optimistic Hedge`` is a special case of ``OptimisticFTRL``
with the negative entropy as the regularizer.
"""
def __init__(self,
domain: Domain,
step_size: Union[float, list],
optimism_type: str = 'external',
is_lazy: bool = False,
correct: bool = False,
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, optimism_type, prior, seed)
self.is_lazy = is_lazy
if self.is_lazy:
self.cum_loss = np.zeros_like(self.x)
self.correct = correct
[docs] def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
if self.optimism_type == 'external' and optimism is not None:
self.optimism = optimism
else:
pass
step_size = self.get_step_size()
self.x = self.middle_x * np.exp(-step_size * self.optimism)
self.x = self.domain.project(self.x)
[docs] def opt_by_gradient(self, env: Environment):
loss, surrogate_loss = env.get_loss(self.x)
self.grad = env.get_grad(self.x)
step_size = self.get_step_size()
if self.correct and self.optimism is not None:
correction = step_size * (self.grad - self.optimism)**2
elif self.correct:
correction = step_size * (self.grad)**2
else:
correction = np.zeros_like(self.grad)
if self.is_lazy:
self.cum_loss += self.grad
self.middle_x = self.init_x * np.exp(-step_size *
(self.cum_loss + correction))
else:
self.middle_x = self.middle_x * np.exp(-step_size *
(self.grad + correction))
self.middle_x = self.domain.project(self.middle_x)
self.compute_internal_optimism(env)
self.t += 1
return self.x, loss, surrogate_loss
[docs] def reinit(self):
self.__init__(self.domain, self.step_size, self.optimism_type,
self.is_lazy, self.correct, self.prior, self.seed)
[docs]class Hedge(OptimisticHedge):
"""Implementation of Hedge.
``Hedge`` is the most popular algorithm for tracking the best expert
problem. There are two versions of ``Hedge``: the greedy version and the
lazy version. The greedy version is a special case of ``Optimistic OMD``
with the negative entropy as the regularizer and ``optimism_type=None`` and
the lazy version is a special case of ``Optimistic FTRL`` with the negative
entropy as the regularizer and ``optimism_type=None``. The greedy version
``Hedge`` updates the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}}[x_t\exp(-\eta_t (\\nabla f_t({x}_t) +a_t))],
and the lazy version ``Hedge`` updates the decision :math:`x_{t+1}` by
.. math::
x_{t+1} = \Pi_{\mathcal{X}}[x_1 \exp(-\eta_t (\sum_{s=1}^{t} (\\nabla
f_s(x_s) +a_s)))],
where :math:`\eta_t > 0` is the step size, :math:`a_t` is the correction
term at round :math:`t`, and :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the
projection onto the nearest point in :math:`\mathcal{X}`.
Args:
domain (Domain): Feasible set for the base algorithm.
step_size (float, numpy.ndarray): the step size :math:`\eta` for the
base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
the type of the step size :math:`\eta` is `float`, the algorithm will
use the fixed step size all the time, otherwise, the algorithm will use
the step size :math:`\eta_t` at round :math:`t`.
is_lazy (bool): Type of the update version: lazy or greedy. The
default is False.
correct (bool): Whether to use correction term. :math:`a_t` is
set as :math:`a_t = \\nabla f_t(x_t)^2` if ``correct=True`` and
:math:`a_t=0` otherwise. The default is False.
prior (str, numpy.ndarray, optional): The initial decision of the
algorithm is set as ``domain.init_x(prior, seed)``.
seed (int, optional): The initial decision of the algorithm is set as
``domain.init_x(prior, seed)``.
"""
def __init__(self,
domain: Domain,
step_size: Union[float, list],
is_lazy: bool = False,
correct: bool = False,
prior: Optional[Union[list, np.ndarray]] = None,
seed: Optional[int] = None):
super().__init__(domain, step_size, None, is_lazy, correct, prior,
seed)