Source code for pynol.environment.environment

from typing import Callable, Iterable, Optional
from autograd import grad as grad_solver
import numpy as np


[docs]class Environment: """Class for the environment, including loss function, optimism, and so on. At each round, the environment can set two loss functions, one is the origin loss function ``func`` :math:`f_t` and the other is the surrogate loss function ``surrogate_func`` :math:`f'_t` (if any). The gradient of which function is given to the learner is determined by ``use_surrogate_grad``. Args: func_sequence (Iterable, optional): Loss function sequence for whole time horizon. optimism (numpy.ndarray): Optimism at the beginning of current round. func (Callable, optional): Origin loss function at current round. grad (numpy.ndarray, optional): Gradient of all decisions for origin loss function, only used when the gradient of all decisions are the same, namely, the liner function. grad_func (Callable, optional): Gradient function for origin loss function. It can be given by the environment to accelerate the running time. If it is not given, the origin gradient function will be computed by ``autograd``. surrogate_func (Callable, optional): Surrogate loss function at current round. surrogate_grad (numpy.ndarray, optional): Gradient of all decisions for surrogate loss function, only used when the gradient of all decisions are the same, namely, the liner function. surrogate_grad_func (Callable, optional): Gradient function for surrogate loss function. It can be given by the environment to accelerate the running time. If it is not given, the surrogate gradient function will be computed by ``autograd``. use_surrogate_grad (bool): Gradient of which function is returned by the environment. full_info (bool): Specify the type of feedback: full-information or bandit feedback. """ def __init__(self, func_sequence: Optional[Iterable] = None, optimism: Optional[np.ndarray] = None, func: Optional[Callable[[np.ndarray], float]] = None, grad: Optional[np.ndarray] = None, grad_func: Optional[Callable[[np.ndarray], float]] = None, surrogate_func: Optional[Callable[[np.ndarray], float]] = None, surrogate_grad: Optional[np.ndarray] = None, surrogate_grad_func: Optional[Callable[[np.ndarray], float]] = None, use_surrogate_grad: bool = True, full_info: bool = True) -> None: self.func_sequence = func_sequence self.optimism = optimism self.func = func self.grad = grad self.grad_func = grad_func self.surrogate_func = surrogate_func self.surrogate_grad = surrogate_grad self.surrogate_grad_func = surrogate_grad_func self.use_surrogate_grad = use_surrogate_grad self.full_info = full_info def __getitem__(self, t): self.func = self.func_sequence[t] self.grad = None self.grad_func = None self.surrogate_func = None self.surrogate_grad = None self.surrogate_grad_func = None return self
[docs] def get_loss(self, x: np.ndarray): """Get the loss value of the decision :math:`x`. Args: x (numpy.ndarray): Decision of the learner. Returns: tuple: tuple contains: loss (float): Origin loss value.\n surrogate_loss (float): Surrogate loss value """ loss = self.func(x) surrogate_loss = self.surrogate_func( x) if self.surrogate_func else None return loss, surrogate_loss
[docs] def get_grad(self, x: np.ndarray): """Get the gradient of the decision :math:`x`. Args: x (numpy.ndarray): Decision of the learner. Returns: numpy.ndarray: Gradient of the decision :math:`x`. """ if self.use_surrogate_grad: if self.surrogate_grad is not None: return self.surrogate_grad elif self.surrogate_grad_func is not None: return self.surrogate_grad_func(x) elif self.surrogate_func is not None: self.surrogate_grad_func = grad_solver(self.surrogate_func) return self.surrogate_grad_func(x) else: pass if self.grad is not None: return self.grad elif self.grad_func is not None: return self.grad_func(x) else: self.grad_func = grad_solver(self.func) return self.grad_func(x)