Source code for pynol.learner.specification.optimism_meta
from abc import ABC, abstractmethod
import numpy as np
from pynol.learner.meta import Hedge
[docs]class OptimismMeta(ABC):
"""The abstract class defines the optimism for meta-algorithm.
Attributes:
is_external (bool): Indicates the optimism of meta-algorithm depends on
the optimism given by the environment or computed by the algorithm
itself. The default is True.
"""
def __init__(self, is_external: bool = True):
self.is_external = is_external
[docs] @abstractmethod
def compute_optimism_meta(self):
"""Compute the optimism for meta-algorithm."""
raise NotImplementedError()
[docs]class InnerSwitchingOptimismMeta(OptimismMeta):
"""The abstract class defines the inner function with switching cost to compute the
optimism for meta-algorithm.
Args:
penalty (float): Penalty coefficient of switching cost term.
norm (int): Order of norm :math:`p`.
order (int): Order of switching cost :math:`q`.
"""
def __init__(self, penalty: float, norm: int = 2, order: int = 2):
super().__init__(is_external=True)
self.penalty = penalty
self.norm = norm
self.order = order
[docs] def compute_optimism_meta(self, variables):
"""Set the surrogate loss of meta-algorithm as
.. math::
M_t(x)=\langle m_t, x_{t,i} \\rangle + \lVert
x_{t,i} - x_{t-1, i} \\rVert_p^q,
where :math:`x_t` is the submitted decision and :math:`x_{t, i}` is the
decision of base-learner i at round :math:`t`.
"""
new_x_bases = variables['schedule'].x_active_bases
x_bases = variables['x_bases'] if 'x_bases' in variables else None
optimism = variables['schedule'].optimism
optimism_meta = self.inner_switching(new_x_bases, optimism,
self.penalty, x_bases, self.norm,
self.order)
return optimism_meta
[docs] @staticmethod
def inner_switching(x, gradient, penalty, x_last, norm=2, order=2):
if x_last is None:
x_last = x
return (x * gradient).sum(axis=1) + penalty * np.linalg.norm(
x - x_last, ord=norm, axis=1)**order
[docs]class InnerOptimismMeta(InnerSwitchingOptimismMeta):
"""The abstract class defines the inner function to compute the optimism for meta-algorithm.
.. Note::
``InnerOptimismMeta`` is a special case of ``InnerSwitchingOptimism``
with ``penalty = 0``.
"""
def __init__(self):
return super().__init__(penalty=0.)
[docs]class SwordVariationOptimismMeta(OptimismMeta):
"""The optimism of meta-algorithm used in SwordVariation."""
def __init__(self):
super().__init__(is_external=False)
[docs] def compute_optimism_meta(self, variables):
"""set the optimism for meta-algorithm as :math:`M_{t,i} = \langle
\\nabla f_{t-1}(\\bar{x}_t), x_{t,i} \\rangle` with :math:`\\bar{x}_t =
\sum_{i=1}^N p_{t-1,i}x_{t,i}`, where :math:`x_{t,i}` is the decision of
the base-learner :math:`i` at round :math:`t`, and :math:`p_{t-1}` is
the decision of meta-algorithm at round :math:`t-1`.
"""
x_bases = variables['schedule'].x_active_bases
x_combined = np.dot(variables['meta'].prob, x_bases)
optimism = variables['env'].get_grad(x_combined)
optimism_meta = np.dot(x_bases, optimism)
return optimism_meta
[docs]class SwordBestOptimismMeta(OptimismMeta):
"""The optimism of meta-algorithm used in SwordBest, who learn two optimism
sequence via another expert-tracking algorithm."""
def __init__(self) -> None:
super().__init__(is_external=False)
self._meta = Hedge(prob=np.ones(2)/2, lr=1)
self._variation_optimism_last = None
[docs] def compute_optimism_meta(self, variables):
"""To achieve the best-of-both-worlds results, this method will learn the
best optimism from two optimism sequence :math:`M_t^v= \\nabla
f_{t-1}(\\bar{x}_t), \\bar{x}_t = \sum_{i=1}^{N_1}
p_{t-1,i}x_{t,i}^v + \sum_{i=N_1+1}^{N_2} p_{t-1,i}x_{t,i}^s` and
:math:`M_t^s = 0`. The final optimism is set as :math:`\langle M_t^b,
x_{t,i} \\rangle` with :math:`M_t^b = \\beta_t M_t^v +
(1-\\beta_t)M_t^s`, where :math:`\\beta_t` is updated by
.. math::
\\beta_t = \\frac{\exp(-2\sum_{s=1}^{t-1}\lVert\\nabla
f_s(x_s)-M_t^v\\rVert_2^2)}{\exp(-2\sum_{s=1}^{t-1}\lVert\\nabla
f_s(x_s)-M_t^v\\rVert_2^2) + \exp(-2\sum_{s=1}^{t-1}\lVert\\nabla
f_s(x_s)\\rVert_2^2)}.
"""
loss = np.zeros(2)
if 'grad' not in variables:
return None
if self._variation_optimism_last is None:
self._variation_optimism_last = np.zeros_like(variables['grad'])
loss[0] = np.linalg.norm(variables['grad'] -
self._variation_optimism_last)**2
loss[1] = np.linalg.norm(variables['grad'])**2
prob_optimism = self._meta.opt(loss)
x_bases = variables['schedule'].x_active_bases
x_combined = np.dot(variables['meta'].prob, x_bases)
surrogate_grad = variables['env'].get_grad(x_combined)
optimism = prob_optimism[0] * surrogate_grad
optimism_meta = np.dot(x_bases, optimism)
self._variation_optimism_last = surrogate_grad
return optimism_meta