Source code for pynol.learner.schedule.schedule

from copy import deepcopy
from typing import Optional, Union

import numpy as np
from pynol.environment.environment import Environment
from pynol.learner.schedule.cover import (Cover, FullCover, PCover)
from pynol.learner.schedule.ssp import SSP


[docs]class Schedule: """The class to schedule base-learners with problem-independent cover. Args: ssp (SSP): A ssp instance containing a bunch of initialized base-learners. cover (Cover, optional): A cover instance deciding the active state of base-learners. """ def __init__(self, ssp: SSP, cover: Optional[Cover] = None): self.bases = ssp.bases self.cover = cover if cover is not None else FullCover(len(self.bases)) self._t = 0 self._optimism = None self.active_state = np.ones(len(self.bases)) self.active_index = np.where(self.active_state > 0)[0]
[docs] def opt_by_optimism(self, optimism: np.ndarray): """Optimize by the optimism for all base-learners. Args: optimism (numpy.ndarray): External optimism for all alive base-learners. """ for idx in self.active_index: self.bases[idx].opt_by_optimism(optimism)
[docs] def opt_by_gradient(self, env: Environment): """Optimize by the gradient for all base-learners. Args: env (Environment): Environment at current round. Returns: tuple: tuple contains: loss (float): the origin loss of all alive base-learners. \n surrogate_loss (float): the surrogate loss of all alive base-learners. """ loss = np.zeros(len(self.active_index)) surrogate_loss = np.zeros_like(loss) for i, idx in enumerate(self.active_index): _, loss[i], surrogate_loss[i] = self.bases[idx].opt_by_gradient( env) return loss, surrogate_loss
@property def t(self): """Set the number of current round, get active state from ``cover`` and reinitialize the base-learners whose active state is 2. """ return self._t @t.setter def t(self, t): self._t = t self.cover.t = t self.active_state = self.cover.active_state self.active_index = np.where(self.active_state > 0)[0] self.reinit_bases() @property def x_active_bases(self): """Get the decisions of all alive base-learners. Returns: numpy.ndarray: Decisions of all alive base learners. """ return np.array([self.bases[i].x for i in self.active_index]) @property def optimism(self): """Get the optimisms of all alive base-learners. Returns: numpy.ndarray: Optimisms of all alive base learners. """ self._optimism = np.zeros_like(self.x_active_bases) for i, idx in enumerate(self.active_index): self._optimism[i] = self.bases[idx].optimism return self._optimism
[docs] def reinit_bases(self): """Reinitialize the base-learners whose active state is 2.""" reinit_idx = np.where(self.active_state == 2)[0] for idx in reinit_idx: self.bases[idx].reinit()
[docs]class PSchedule(Schedule): """The class to schedule base-learners with problem-dependent cover. Args: ssp (SSP): A ssp instance containing a bunch of initialized base-learners. cover (PCover, optional): A cover instance deciding the active state of base-learners. """ def __init__(self, ssp: SSP, cover: Optional[PCover] = None): super().__init__(ssp, cover) self._instance = deepcopy(self.bases[0])
[docs] def opt_by_optimism(self, optimism: np.ndarray): """Optimize by the optimism for all base-learners. Args: optimism (numpy.ndarray): External optimism for all alive base-learners. """ for idx in self.active_index: self.bases[idx].opt_by_optimism(optimism) self._instance.opt_by_optimism(optimism)
[docs] def opt_by_gradient(self, env): """Optimize by the gradient for all base-learners. Args: env (Environment): Environment at current round. Returns: tuple: tuple contains: loss (float): the origin loss of all alive base-learners. \n surrogate_loss (float): the surrogate loss of all alive base-learners. """ loss = np.zeros(len(self.active_index)) surrogate_loss = np.zeros_like(loss) for i, idx in enumerate(self.active_index): _, loss[i], surrogate_loss[i] = self.bases[idx].opt_by_gradient( env) _, instance_loss, _ = self._instance.opt_by_gradient(env) reinit_instance = self.cover.set_instance_loss(instance_loss) if reinit_instance: self._instance.reinit() return loss, surrogate_loss
@property def t(self): """Set the number of current round, get active state from ``cover`` and reinitialize the base-learners whose active state is 2. """ return self._t @t.setter def t(self, t): self._t = t self.cover.t = t self.active_state = self.cover.active_state self.active_index = np.where(self.active_state > 0)[0] self.reinit_bases() @property def x_active_bases(self): """Get the decisions of all alive base-learners. Returns: numpy.ndarray: Decisions of all alive base learners. """ return np.array([self.bases[i].x for i in self.active_index]) @property def optimism(self): """Get the optimisms of all alive base-learners. Returns: numpy.ndarray: Optimisms of all alive base learners. """ self._optimism = np.zeros_like(self.x_active_bases) for i, idx in enumerate(self.active_index): self._optimism[i] = self.bases[idx].optimism return self._optimism
[docs] def reinit_bases(self): """Reinitialize the base-learners whose active state is 2.""" reinit_idx = np.where(self.active_state == 2)[0] for idx in reinit_idx: self.bases[idx].reinit()