Source code for pynol.learner.base

from abc import ABC, abstractmethod
from typing import Optional, Union

import numpy as np
from pynol.environment.domain import Domain
from pynol.environment.environment import Environment


[docs]class Base(ABC):
    """An abstract class for base algorithms.

    Args:
        domain (Domain): Feasible set for the base algorithm.
            step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round $t$.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, np.ndarray],
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        self.domain = domain
        self.step_size = step_size
        self.prior = prior
        self.seed = seed
        self.x = self.domain.init_x(prior, seed)
        self.t = 0

[docs]    def opt(self, env: Environment):
        """The optimization process of the base algorithm.

        All base algorithms are divided into two parts:
        :meth:`~pynol.learner.base.Base.opt_by_optimism` at the beginning of
        current round and :meth:`~pynol.learner.base.Base.opt_by_gradient` at the
        end of current round.

        Args:
            env (Environment): Environment at the current round.

        Returns:
            tuple: tuple contains:
                x (numpy.ndarray): Decision at the current round. \n
                loss (float): Origin loss at the current round. \n
                surrogate_loss (float): the surrogate loss at the current round.
        """
        self.opt_by_optimism(env.optimism)
        return self.opt_by_gradient(env)

[docs]    def get_step_size(self):
        """Get the step size at each round.

        Returns:
            float: Step size at the current round.
        """
        return self.step_size[self.t] if hasattr(self.step_size,
                                                 '__len__') else self.step_size

[docs]    def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
        """Optimize by the optimism.

        Args:
            optimism (numpy.ndarray, optional): External optimism at the beginning of the
                current round.

        Returns:
            None
        """
        pass

[docs]    @abstractmethod
    def opt_by_gradient(self, env: Environment):
        """Optimize by the true gradient.

        All base-algorithms are required to override this method to implement
        their own optimization process.

        Args:
            env (Environment): Environment at the current round.

        Returns:
            tuple: tuple contains:
                x (numpy.ndarray): the decision at the current round. \n
                loss (float): the origin loss at the current round. \n
                surrogate_loss (float): the surrogate loss at the current round.
        """
        pass

[docs]    def reinit(self):
        """Reset the base algorithm to the initial state, which is used for
        adaptive algorithms.
        """
        self.__init__(self.domain, self.step_size, self.prior, self.seed)


[docs]class OGD(Base):
    """Implementation of Online Gradient Descent.

    ``OGD`` stands for Online Gradient Descent, the most popular algorithm for
    online learning. `OGD` updates the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}} [x_t - \eta_t \\nabla f_t(x_t)]

    where :math:`\eta_t > 0` is the step size at round `t`, and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round $t$.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, np.ndarray],
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, prior, seed)

[docs]    def opt_by_gradient(self, env: Environment):
        x = self.x
        loss, surrogate_loss = env.get_loss(x)
        grad = env.get_grad(x)
        step_size = self.get_step_size()
        self.x = x - step_size * grad
        self.x = self.domain.project(self.x)
        return x, loss, surrogate_loss


[docs]class BGDOnePoint(Base):
    """Implementation of Bandit Convex Optimization with one-point feedback.

    ``BGDOnePoint`` stands for Bandit Convex Optimization with one-point
    feedback, in which at each round the learner submits one decision and then
    can only observe the function value of this point. ``BGDOnePoint`` updates
    the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}}[x_t - \eta_t \\tilde{g}_t] \mbox{ with }
        \\tilde{g}_t = \\frac{d}{\delta}f_t(x_t + \delta s_t) \cdot s_t,

    where :math:`\eta_t > 0` is the step size at round `t`, :math:`d` is the
    dimension, :math:`\delta` is the scale of the perturbation, :math:`s_t` is
    the unit vector selected uniformly at random and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round $t$.
        scale_perturb (float): Scale of perturbation :math:`\delta`.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    References:
        https://arxiv.org/abs/cs/0408007
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, np.ndarray],
                 scale_perturb: float,
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, prior, seed)
        self.scale_perturb = scale_perturb
        self.shrink = self.scale_perturb / self.domain.r

[docs]    def opt_by_gradient(self, env: Environment):
        """Optimize by the estimated gradient.

        Args:
            env (Environment): Environment at the current round.

        Returns:
            tuple: tuple contains:
                x (numpy.ndarray): Decision at the current round. \n
                loss (float): the origin loss at the current round. \n
                surrogate_loss (float): Surrogate loss at the current round.
        """
        unit_vec = self.domain.unit_vec()
        x1 = self.x + self.scale_perturb * unit_vec
        loss, surrogate_loss = env.get_loss(x1)
        grad = self.get_grad(self.domain.dimension, loss, unit_vec)
        step_size = self.get_step_size()
        self.x = self.x - step_size * grad
        self.x = ((1 - self.shrink) * self.domain).project(self.x)
        self.t += 1
        return x1, loss, surrogate_loss

[docs]    def get_grad(self, dimension, loss, unit_vec) -> np.ndarray:
        """Estimate the gradient by :math:`\\tilde{g}_t =
        \\frac{d}{\delta}f_t(x_t + \delta s_t) \cdot s_t`.

        Args:
            dimension (int): Dimension of the decision.
            loss (float): Loss of the submitted decision.
            unit_vec (numpy.ndarray): Unit vector used to perturb the decision.

        Returns:
            numpy.ndarray: the estimated gradient at the current round.
        """
        grad = dimension / self.scale_perturb * loss * unit_vec
        return grad

[docs]    def reinit(self):
        self.__init__(self.domain, self.step_size, self.scale_perturb,
                      self.prior, self.seed)


[docs]class BGDTwoPoint(Base):
    """Implementation of Bandit Convex Optimization with Two-point Feedback.

    ``BGDTwoPoint`` stands for Bandit Convex Optimization with two-point
    feedback, in which at each round the learner submits two decisions and then
    can only observe the function values of these two points. ``BGDTwoPoint``
    updates the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}} [x_t - \eta_t \\tilde{g}_t] \mbox{ with }
        \\tilde{g}_t = \\frac{d}{2\delta}(f_t(x_t + \delta s_t) - f_t(x_t -
        \delta s_t))\cdot s_t,

    where :math:`\eta_t > 0` is the step size at round `t`, :math:`d` is the
    dimension, :math:`\delta` is the scale of the perturbation, :math:`s_t` is
    the unit vector selected uniformly at random, and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round $t$.
        scale_perturb (float): Scale of perturbation :math:`\delta`.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.


    References:
        https://alekhagarwal.net/bandits-colt.pdf
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, list],
                 scale_perturb: float,
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, prior, seed)
        self.scale_perturb = scale_perturb
        self.shrink = self.scale_perturb / self.domain.r

[docs]    def opt_by_gradient(self, env: Environment):
        """Optimize by the estimated gradient.

        Args:
            env (Environment): Environment at the current round.

        Returns:
            tuple: tuple contains:
                x (numpy.ndarray): Average decision at the current round. \n
                loss (float): Average origin loss at the current round. \n
                surrogate_loss (float): Average surrogate loss at the current round.
        """
        x = self.x
        unit_vec = self.domain.unit_vec()
        x1 = self.x + self.scale_perturb * unit_vec
        x2 = self.x - self.scale_perturb * unit_vec
        loss1, surrogate_loss1 = env.get_loss(x1)
        loss2, surrogate_loss2 = env.get_loss(x2)
        loss = (loss1 + loss2) / 2
        if surrogate_loss1 is not None and surrogate_loss2 is not None:
            surrogate_loss = (surrogate_loss1 + surrogate_loss2) / 2
        else:
            surrogate_loss = None
        grad = self.get_grad(self.domain.dimension, loss1, loss2, unit_vec)
        step_size = self.get_step_size()
        self.x = self.x - step_size * grad
        self.x = ((1 - self.shrink) * self.domain).project(self.x)
        self.t += 1
        return x, loss, surrogate_loss

[docs]    def get_grad(self, dimension, loss1, loss2, unit_vec):
        """Estimate the gradient by :math:`\\tilde{g}_t =
        \\frac{d}{2\delta}(f_t(x_t + \delta s_t) - f_t(x_t - \delta s_t))\cdot
        s_t`.

        Args:
            dimension (int): Dimension of the decision.
            loss1 (float): Loss of the first submitted decision.
            loss2 (float): Loss of the second submitted decision.
            unit_vec (numpy.ndarray): Unit vector used to perturb the decision.

        Returns:
            numpy.ndarray: Estimated gradient at the current round.
        """
        self.grad = dimension / (2 * self.scale_perturb) * (loss1 -
                                                            loss2) * unit_vec
        return self.grad

[docs]    def reinit(self):
        self.__init__(self.domain, self.step_size, self.scale_perturb,
                      self.prior, self.seed)


[docs]class SOGD(Base):
    """Implementation of scale-free online learning.

    ``SOGD`` stands for Scale-free Online Gradient Descent, an online convex
    optimization algorithm that achieves parameter-free and scale-free
    simultaneously, which is useful in deriving small-loss bound for smooth
    functions. ``SOGD`` updates the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}}[x_t - \\frac{1}{\sqrt{\delta +
        \sum_{s=1}^{t-1} \\nabla f_s(x_s)}}  \\nabla f_t(x_t)]

    where :math:`\delta` is a small constant to avoid divide by :math:`0`, and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`. We set :math:`\delta=1` in our implementation.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    References:
        https://arxiv.org/abs/1601.01974
    """

    def __init__(self,
                 domain: Domain,
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, None, prior, seed)
        self.grad_cum = 1

[docs]    def opt_by_gradient(self, env: Environment):
        x = self.x
        loss, surrogate_loss = env.get_loss(x)
        grad = env.get_grad(x)
        self.grad_cum += np.dot(grad, grad)
        step_size = 1 / np.sqrt(self.grad_cum)
        self.x = self.x - step_size * grad
        self.x = self.domain.project(self.x)
        self.t += 1
        return x, loss, surrogate_loss

[docs]    def reinit(self):
        self.__init__(self.domain, self.prior, self.seed)


[docs]class OEGD(Base):
    """Implementation of online extra-gradient descent.

    ``OEGD`` stands for Online Extra-Gradient Descent. It is shown to enjoy
    gradient-variation regret guarantee, which would be smaller when the
    environments evolve gradually. ``OEGD`` updates decision :math:`x_t` by

    .. math::

        x_t = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_{t-1}(\hat{x}_t)], \n
        \hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_t(x_t)],

    where :math:`\eta_t > 0` is is the step size at round :math:`t`, and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round $t$.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    References:
        https://proceedings.mlr.press/v23/chiang12.html
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, np.ndarray],
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, prior, seed)
        self.middle_x = self.x
        self.optimism = np.zeros_like(self.x)

[docs]    def opt_by_gradient(self, env: Environment):
        loss, surrogate_loss = env.get_loss(self.x)
        grad = env.get_grad(self.x)
        step_size = self.get_step_size()
        self.middle_x = self.middle_x - step_size * grad
        self.middle_x = self.domain.project(self.middle_x)
        self.optimism = env.get_grad(self.middle_x)
        self.t += 1
        return self.x, loss, surrogate_loss

[docs]    def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
        step_size = self.get_step_size()
        self.x = self.middle_x - step_size * self.optimism
        self.x = self.domain.project(self.x)


[docs]class OptimisticBase(Base):
    """An abstract class for optimistic-type base algorithms.

    The optimistic-type algorithms are very general and useful, and they can
    achieve a tighter regret guarantee with benign “predictable sequences”.
    There are mainly two types of optimistic algorithms: Optimistic Online
    Mirror Descent (Optimistic OMD) and Optimistic Follow The Regularized Leader
    (Optimistic FTRL). The general update rule of ``Optimistic OMD`` is as
    follows,

    .. math::
        x_t = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle m_t, x\\rangle + D_R(x,
        \hat{x}_t) \n
        \hat{x}_{t+1} = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle \\nabla
        f_t(x_t), x\\rangle + D_R(x, \hat{x}_t),

    and the general update rule of ``Optimistic FTRL`` is as follows,

    .. math::
        x_t = {\\arg\min}_{x \in \mathcal{X}} \ \eta_t \langle \sum_{s=1}^{t-1}
        \\nabla f_s(x_s) + m_t, x\\rangle + R(x),

    where :math:`\eta_t` is the step size at round :math:`t`, :math:`m_t` is the
    optimism at the beginning of round :math:`t`, serving as a guess of the true
    gradient :math:`\\nabla f_t(x_t)` at round :math:`t`, :math:`R` is the
    regularizer and :math:`D_R(\cdot, \cdot)` is the Bregman divergence with
    respect to the regularizer :math:`R`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round :math:`t`.
        optimism_type (str, optional): Type of optimism used for algorithm.
            Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
            ``None``. if ``optimism_type='external'``, the algorithm will accept the
            external specified :math:`m_t` by the environment at each round; if
            ``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
            \\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
            optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
            ``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    References:
        https://proceedings.mlr.press/v23/chiang12.html
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, list],
                 optimism_type: str = 'external',
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, prior, seed)
        self.optimism_type = optimism_type
        self.middle_x = self.x
        self.optimism = np.zeros_like(self.x)

[docs]    def compute_internal_optimism(self, env: Environment):
        """Compute the internal optimism.

        Args:
            env (Environment): Environment at the current round.
        """
        if self.optimism_type is None or self.optimism_type == 'external':
            pass
        elif self.optimism_type == 'last_grad':
            self.optimism = self.grad
        elif self.optimism_type == 'middle_grad':
            self.optimism = env.get_grad(self.middle_x)
        else:
            raise TypeError(f'{self.optimism_type} is not defined')

[docs]    def reinit(self):
        self.__init__(self.domain, self.step_size, self.optimism_type,
                      self.prior, self.seed)


[docs]class OptimisticOGD(OptimisticBase):
    """Implementation of Optimistic Online Gradient Descent.

    ``OptimisticOGD`` is an online convex optimization algorithm, which is a
    specialization of ``Optimistic OMD`` with the Euclidean distance as the
    regularizer. ``OptimisticOGD`` updates the decision :math:`x_{t}` by

    .. math::

        x_t = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t m_t] \n
        \hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t - \eta_t \\nabla f_t(x_t)],

    where :math:`\eta_t > 0` is the step size at round :math:`t`, :math:`m_t` is
    the optimism at the beginning of round :math:`t`, and
    :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection onto the nearest
    point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round :math:`t`.
        optimism_type (str, optional): Type of optimism used for algorithm.
            Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
            ``None``. if ``optimism_type='external'``, the algorithm will accept the
            external specified :math:`m_t` by the environment at each round; if
            ``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
            \\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
            optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
            ``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    .. note::
        1. ``OGD`` is a special case of ``OptimisticOGD`` with ``optimism_type=None``.
        2. ``OEGD`` is a special case of ``OptimisticOGD`` with ``optimism_type='middle_grad``.

    References:
        https://proceedings.mlr.press/v30/Rakhlin13.html
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, list],
                 optimism_type: str = 'external',
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, optimism_type, prior, seed)

[docs]    def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
        if self.optimism_type is None:
            self.optimism = np.zeros_like(self.middle_x)
        elif self.optimism_type == 'external':
            self.optimism = optimism if optimism is not None else np.zeros_like(
                self.middle_x)
        else:
            pass
        step_size = self.get_step_size()
        self.x = self.middle_x - step_size * self.optimism
        self.x = self.domain.project(self.x)

[docs]    def opt_by_gradient(self, env: Environment):
        loss, surrogate_loss = env.get_loss(self.x)
        self.grad = env.get_grad(self.x)
        step_size = self.get_step_size()
        self.middle_x = self.middle_x - step_size * self.grad
        self.middle_x = self.domain.project(self.middle_x)
        self.compute_internal_optimism(env)
        self.t += 1
        return self.x, loss, surrogate_loss


[docs]class OptimisticHedge(OptimisticBase):
    """Implementation of Optimistic Hedge.

    ``Hedge`` is the most popular algorithm for tracking the best expert
    problem. ``Optimistic Hedge`` is an enhanced version of ``Hedge`` which can
    further incorporate the knowledge of predictable sequences. There are two
    versions of ``Optimistic Hedge``: the greedy version and the lazy version.
    The greedy version is a special case of ``Optimistic OMD`` with the negative
    entropy as the regularizer and the lazy version is a special case of
    ``Optimistic FTRL``. The greedy version ``Optimistic Hedge`` updates the
    decision :math:`x_t` by

    .. math::

        {x}_t =  \Pi_{\mathcal{X}}[\hat{x}_{t} \exp(-\eta_t m_{t})] \n
        \hat{x}_{t+1} = \Pi_{\mathcal{X}}[\hat{x}_t\exp(-\eta_t (\\nabla
        f_t({x}_t) +a_t))],

    and the lazy version ``Optimistic Hedge`` updates the decision :math:`x_t` by

    .. math::

        x_t = \Pi_{\mathcal{X}}[x_1 \exp(-\eta_t (\sum_{s=1}^{t-1} (\\nabla
        f_s(x_s) +a_s) + m_t))],

    where :math:`\eta_t > 0` is the step size at round :math:`t`, :math:`m_t` is
    the optimism at the beginning of round :math:`t`, :math:`a_t` is the
    correction term and :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the projection
    onto the nearest point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): Step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round :math:`t`.
        optimism_type (str, optional): Type of optimism used for algorithm.
            Valid actions include ``external``, ``last_grad``, ``middle_grad`` and
            ``None``. if ``optimism_type='external'``, the algorithm will accept the
            external specified :math:`m_t` by the environment at each round; if
            ``optimism_type='last_grad'``, the optimism is set as :math:`m_t =
            \\nabla f_{t-1}(x_{t-1})`; if ``optimism_type='middle_grad'``, the
            optimism is set as :math:`m_t = \\nabla f_{t-1}(\hat{x}_t)`, and if
            ``optimism_type=None``, the optimism is set as :math:`m_t = 0`.
        is_lazy (bool): Type of the update version: lazy or greedy. The
            default is False.
        correct (bool): Whether to use correction term. :math:`a_t` is
            set as :math:`a_t = \eta_t (\\nabla f_t(x_t) - m_t)^2` if ``correct=True`` and
            :math:`a_t=0` otherwise. The default is False.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    References:
        http://proceedings.mlr.press/v30/Rakhlin13.pdf

    .. note::
        1. Greedy version ``Optimistic Hedge`` is a special case of ``OptimisticOMD``
        with the negative entropy as the regularizer.

        2. Lazy version ``Optimistic Hedge`` is a special case of ``OptimisticFTRL``
        with the negative entropy as the regularizer.
    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, list],
                 optimism_type: str = 'external',
                 is_lazy: bool = False,
                 correct: bool = False,
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, optimism_type, prior, seed)
        self.is_lazy = is_lazy
        if self.is_lazy:
            self.cum_loss = np.zeros_like(self.x)
        self.correct = correct

[docs]    def opt_by_optimism(self, optimism: Optional[np.ndarray] = None):
        if self.optimism_type == 'external' and optimism is not None:
            self.optimism = optimism
        else:
            pass
        step_size = self.get_step_size()
        self.x = self.middle_x * np.exp(-step_size * self.optimism)
        self.x = self.domain.project(self.x)

[docs]    def opt_by_gradient(self, env: Environment):
        loss, surrogate_loss = env.get_loss(self.x)
        self.grad = env.get_grad(self.x)
        step_size = self.get_step_size()
        if self.correct and self.optimism is not None:
            correction = step_size * (self.grad - self.optimism)**2
        elif self.correct:
            correction = step_size * (self.grad)**2
        else:
            correction = np.zeros_like(self.grad)
        if self.is_lazy:
            self.cum_loss += self.grad
            self.middle_x = self.init_x * np.exp(-step_size *
                                                 (self.cum_loss + correction))
        else:
            self.middle_x = self.middle_x * np.exp(-step_size *
                                                   (self.grad + correction))
        self.middle_x = self.domain.project(self.middle_x)
        self.compute_internal_optimism(env)
        self.t += 1
        return self.x, loss, surrogate_loss

[docs]    def reinit(self):
        self.__init__(self.domain, self.step_size, self.optimism_type,
                      self.is_lazy, self.correct, self.prior, self.seed)


[docs]class Hedge(OptimisticHedge):
    """Implementation of Hedge.

    ``Hedge`` is the most popular algorithm for tracking the best expert
    problem. There are two versions of ``Hedge``: the greedy version and the
    lazy version. The greedy version is a special case of ``Optimistic OMD``
    with the negative entropy as the regularizer and ``optimism_type=None`` and
    the lazy version is a special case of ``Optimistic FTRL`` with the negative
    entropy as the regularizer and ``optimism_type=None``. The greedy version
    ``Hedge`` updates the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}}[x_t\exp(-\eta_t (\\nabla f_t({x}_t) +a_t))],

    and the lazy version ``Hedge`` updates the decision :math:`x_{t+1}` by

    .. math::

        x_{t+1} = \Pi_{\mathcal{X}}[x_1 \exp(-\eta_t (\sum_{s=1}^{t} (\\nabla
        f_s(x_s) +a_s)))],

    where :math:`\eta_t > 0` is the step size, :math:`a_t` is the correction
    term at round :math:`t`, and :math:`\Pi_{\mathcal{X}}[\cdot]` denotes the
    projection onto the nearest point in :math:`\mathcal{X}`.

    Args:
        domain (Domain): Feasible set for the base algorithm.
        step_size (float, numpy.ndarray): the step size :math:`\eta` for the
            base algorithm. Valid types include ``float`` and ``numpy.ndarray``. If
            the type of the step size :math:`\eta` is `float`, the algorithm will
            use the fixed step size all the time, otherwise, the algorithm will use
            the step size :math:`\eta_t` at round :math:`t`.
        is_lazy (bool): Type of the update version: lazy or greedy. The
            default is False.
        correct (bool): Whether to use correction term. :math:`a_t` is
            set as :math:`a_t = \\nabla f_t(x_t)^2` if ``correct=True`` and
            :math:`a_t=0` otherwise. The default is False.
        prior (str, numpy.ndarray, optional): The initial decision of the
            algorithm is set as ``domain.init_x(prior, seed)``.
        seed (int, optional): The initial decision of the algorithm is set as
            ``domain.init_x(prior, seed)``.

    """

    def __init__(self,
                 domain: Domain,
                 step_size: Union[float, list],
                 is_lazy: bool = False,
                 correct: bool = False,
                 prior: Optional[Union[list, np.ndarray]] = None,
                 seed: Optional[int] = None):
        super().__init__(domain, step_size, None, is_lazy, correct, prior,
                         seed)