Source code for pynol.environment.environment

from typing import Callable, Iterable, Optional
from autograd import grad as grad_solver
import numpy as np


[docs]class Environment:
    """Class for the environment, including loss function, optimism, and so on.

    At each round, the environment can set two loss functions, one is the origin
    loss function ``func`` :math:`f_t` and the other is the surrogate loss
    function ``surrogate_func`` :math:`f'_t` (if any). The gradient of which function is
    given to the learner is determined by ``use_surrogate_grad``.

    Args:
        func_sequence (Iterable, optional): Loss function sequence for whole
            time horizon.
        optimism (numpy.ndarray): Optimism at the beginning of current round.
        func (Callable, optional): Origin loss function at current round.
        grad (numpy.ndarray, optional): Gradient of all decisions for origin
            loss function, only used when the gradient of all decisions are the
            same, namely, the liner function.
        grad_func (Callable, optional): Gradient function for origin loss
            function. It can be given by the environment to accelerate the running
            time. If it is not given, the origin gradient function will be computed by
            ``autograd``.
        surrogate_func (Callable, optional): Surrogate loss function at current round.
        surrogate_grad (numpy.ndarray, optional): Gradient of all decisions for surrogate
            loss function, only used when the gradient of all decisions are the
            same, namely, the liner function.
        surrogate_grad_func (Callable, optional): Gradient function for surrogate loss
            function. It can be given by the environment to accelerate the running
            time. If it is not given, the surrogate gradient function will be computed by
            ``autograd``.
        use_surrogate_grad (bool): Gradient of which function is returned by the
            environment.
        full_info (bool): Specify the type of feedback: full-information or
            bandit feedback.

    """

    def __init__(self,
                 func_sequence: Optional[Iterable] = None,
                 optimism: Optional[np.ndarray] = None,
                 func: Optional[Callable[[np.ndarray], float]] = None,
                 grad: Optional[np.ndarray] = None,
                 grad_func: Optional[Callable[[np.ndarray], float]] = None,
                 surrogate_func: Optional[Callable[[np.ndarray], float]] = None,
                 surrogate_grad: Optional[np.ndarray] = None,
                 surrogate_grad_func: Optional[Callable[[np.ndarray], float]] = None,
                 use_surrogate_grad: bool = True,
                 full_info: bool = True) -> None:

        self.func_sequence = func_sequence
        self.optimism = optimism
        self.func = func
        self.grad = grad
        self.grad_func = grad_func
        self.surrogate_func = surrogate_func
        self.surrogate_grad = surrogate_grad
        self.surrogate_grad_func = surrogate_grad_func
        self.use_surrogate_grad = use_surrogate_grad
        self.full_info = full_info

    def __getitem__(self, t):
        self.func = self.func_sequence[t]
        self.grad = None
        self.grad_func = None
        self.surrogate_func = None
        self.surrogate_grad = None
        self.surrogate_grad_func = None
        return self

[docs]    def get_loss(self, x: np.ndarray):
        """Get the loss value of the decision :math:`x`.

        Args:
            x (numpy.ndarray): Decision of the learner.

        Returns:
            tuple: tuple contains:
                loss (float): Origin loss value.\n
                surrogate_loss (float): Surrogate loss value
        """
        loss = self.func(x)
        surrogate_loss = self.surrogate_func(
            x) if self.surrogate_func else None
        return loss, surrogate_loss

[docs]    def get_grad(self, x: np.ndarray):
        """Get the gradient of the decision :math:`x`.

        Args:
            x (numpy.ndarray): Decision of the learner.

        Returns:
            numpy.ndarray: Gradient of the decision :math:`x`.
        """
        if self.use_surrogate_grad:
            if self.surrogate_grad is not None:
                return self.surrogate_grad
            elif self.surrogate_grad_func is not None:
                return self.surrogate_grad_func(x)
            elif self.surrogate_func is not None:
                self.surrogate_grad_func = grad_solver(self.surrogate_func)
                return self.surrogate_grad_func(x)
            else:
                pass
        if self.grad is not None:
            return self.grad
        elif self.grad_func is not None:
            return self.grad_func(x)
        else:
            self.grad_func = grad_solver(self.func)
            return self.grad_func(x)