Source code for pynol.learner.specification.optimism_meta

from abc import ABC, abstractmethod

import numpy as np
from pynol.learner.meta import Hedge


[docs]class OptimismMeta(ABC): """The abstract class defines the optimism for meta-algorithm. Attributes: is_external (bool): Indicates the optimism of meta-algorithm depends on the optimism given by the environment or computed by the algorithm itself. The default is True. """ def __init__(self, is_external: bool = True): self.is_external = is_external
[docs] @abstractmethod def compute_optimism_meta(self): """Compute the optimism for meta-algorithm.""" raise NotImplementedError()
[docs]class InnerSwitchingOptimismMeta(OptimismMeta): """The abstract class defines the inner function with switching cost to compute the optimism for meta-algorithm. Args: penalty (float): Penalty coefficient of switching cost term. norm (int): Order of norm :math:`p`. order (int): Order of switching cost :math:`q`. """ def __init__(self, penalty: float, norm: int = 2, order: int = 2): super().__init__(is_external=True) self.penalty = penalty self.norm = norm self.order = order
[docs] def compute_optimism_meta(self, variables): """Set the surrogate loss of meta-algorithm as .. math:: M_t(x)=\langle m_t, x_{t,i} \\rangle + \lVert x_{t,i} - x_{t-1, i} \\rVert_p^q, where :math:`x_t` is the submitted decision and :math:`x_{t, i}` is the decision of base-learner i at round :math:`t`. """ new_x_bases = variables['schedule'].x_active_bases x_bases = variables['x_bases'] if 'x_bases' in variables else None optimism = variables['schedule'].optimism optimism_meta = self.inner_switching(new_x_bases, optimism, self.penalty, x_bases, self.norm, self.order) return optimism_meta
[docs] @staticmethod def inner_switching(x, gradient, penalty, x_last, norm=2, order=2): if x_last is None: x_last = x return (x * gradient).sum(axis=1) + penalty * np.linalg.norm( x - x_last, ord=norm, axis=1)**order
[docs]class InnerOptimismMeta(InnerSwitchingOptimismMeta): """The abstract class defines the inner function to compute the optimism for meta-algorithm. .. Note:: ``InnerOptimismMeta`` is a special case of ``InnerSwitchingOptimism`` with ``penalty = 0``. """ def __init__(self): return super().__init__(penalty=0.)
[docs]class SwordVariationOptimismMeta(OptimismMeta): """The optimism of meta-algorithm used in SwordVariation.""" def __init__(self): super().__init__(is_external=False)
[docs] def compute_optimism_meta(self, variables): """set the optimism for meta-algorithm as :math:`M_{t,i} = \langle \\nabla f_{t-1}(\\bar{x}_t), x_{t,i} \\rangle` with :math:`\\bar{x}_t = \sum_{i=1}^N p_{t-1,i}x_{t,i}`, where :math:`x_{t,i}` is the decision of the base-learner :math:`i` at round :math:`t`, and :math:`p_{t-1}` is the decision of meta-algorithm at round :math:`t-1`. """ x_bases = variables['schedule'].x_active_bases x_combined = np.dot(variables['meta'].prob, x_bases) optimism = variables['env'].get_grad(x_combined) optimism_meta = np.dot(x_bases, optimism) return optimism_meta
[docs]class SwordBestOptimismMeta(OptimismMeta): """The optimism of meta-algorithm used in SwordBest, who learn two optimism sequence via another expert-tracking algorithm.""" def __init__(self) -> None: super().__init__(is_external=False) self._meta = Hedge(prob=np.ones(2)/2, lr=1) self._variation_optimism_last = None
[docs] def compute_optimism_meta(self, variables): """To achieve the best-of-both-worlds results, this method will learn the best optimism from two optimism sequence :math:`M_t^v= \\nabla f_{t-1}(\\bar{x}_t), \\bar{x}_t = \sum_{i=1}^{N_1} p_{t-1,i}x_{t,i}^v + \sum_{i=N_1+1}^{N_2} p_{t-1,i}x_{t,i}^s` and :math:`M_t^s = 0`. The final optimism is set as :math:`\langle M_t^b, x_{t,i} \\rangle` with :math:`M_t^b = \\beta_t M_t^v + (1-\\beta_t)M_t^s`, where :math:`\\beta_t` is updated by .. math:: \\beta_t = \\frac{\exp(-2\sum_{s=1}^{t-1}\lVert\\nabla f_s(x_s)-M_t^v\\rVert_2^2)}{\exp(-2\sum_{s=1}^{t-1}\lVert\\nabla f_s(x_s)-M_t^v\\rVert_2^2) + \exp(-2\sum_{s=1}^{t-1}\lVert\\nabla f_s(x_s)\\rVert_2^2)}. """ loss = np.zeros(2) if 'grad' not in variables: return None if self._variation_optimism_last is None: self._variation_optimism_last = np.zeros_like(variables['grad']) loss[0] = np.linalg.norm(variables['grad'] - self._variation_optimism_last)**2 loss[1] = np.linalg.norm(variables['grad'])**2 prob_optimism = self._meta.opt(loss) x_bases = variables['schedule'].x_active_bases x_combined = np.dot(variables['meta'].prob, x_bases) surrogate_grad = variables['env'].get_grad(x_combined) optimism = prob_optimism[0] * surrogate_grad optimism_meta = np.dot(x_bases, optimism) self._variation_optimism_last = surrogate_grad return optimism_meta