Source code for pynol.online_learning

from multiprocessing import Pool
from typing import Union
import time
import numpy as np

from pynol.environment.environment import Environment
from pynol.learner.base import Base
from pynol.learner.models.model import Model


[docs]def online_learning(T, env: Environment, learner: Union[Base, Model]): """Combine the environment and learner, start the online learning process. Args: T (int): Number of total round. env (Environment): Environment. learner (Base, Model): Learner. Returns: tuple: tuple contains: x (numpy.ndarray): Decisions over :math:`T` rounds. \n loss (numpy.ndarray): Losses over :math:`T` rounds. \n surrogate_loss (numpy.ndarray): Surrogate losses over :math:`T` rounds. \n tm (numpy.ndarray): Time cost over :math:`T` rounds. """ if hasattr(learner, 'domain'): dimension = learner.domain.dimension else: dimension = learner.schedule.bases[0].domain.dimension x = np.zeros((T, dimension)) loss, surrogate_loss = np.zeros(T), np.zeros(T) tm = np.zeros(T) for t in range(T): start_time = time.time() x[t], loss[t], surrogate_loss[t] = learner.opt(env[t]) tm[t] = time.time() - start_time return x, loss, surrogate_loss, tm
[docs]def multiple_online_learning(T, env: Environment, learners: list, processes=4): """Combine the environment and multiple learners, start the online learning process with multiprocessing to speed up. Args: T (int): Number of total round. env (Environment): Environment. learners (list): Learners. Returns: tuple: tuple contains: x (numpy.ndarray): Decisions of all learners over :math:`T` rounds. \n loss (numpy.ndarray): Losses of all learners over :math:`T` rounds. \n surrogate_loss (numpy.ndarray): Surrogate losses of all learners over :math:`T` rounds. \n tm (numpy.ndarray): Time cost of all learners over :math:`T` rounds. """ if hasattr(learners[0][0], 'domain'): dimension = learners[0][0].domain.dimension else: dimension = learners[0][0].schedule.bases[0].domain.dimension num_learners, num_repeat = len(learners), len(learners[0]) x = np.zeros((num_learners, num_repeat, T, dimension)) loss, surrogate_loss = np.zeros((num_learners, num_repeat, T)), np.zeros((num_learners, num_repeat, T)) tm = np.zeros_like(loss) p = Pool(processes=processes) results = [] for i in range(num_learners): for j in range(num_repeat): results.append( (i, j, p.apply_async(online_learning, (T, env, learners[i][j])))) p.close() p.join() for i, j, result in results: x[i][j], loss[i][j], surrogate_loss[i][j], tm[i][j] = result.get() return x, loss, surrogate_loss, tm