Source code for citylearn.wrappers

import itertools
from typing import Any, List, Mapping, Tuple
from gymnasium import ActionWrapper, Env, ObservationWrapper, RewardWrapper, spaces, Wrapper
import numpy as np
import pandas as pd

try:
    from ray.rllib.env import MultiAgentEnv
except (ModuleNotFoundError, ImportError) as e:
    from gymnasium import Env as MultiAgentEnv

from citylearn.citylearn import CityLearnEnv
from citylearn.building import Building

[docs] class ClippedObservationWrapper(ObservationWrapper): """Wrapper for observations min-max and periodic normalization. Observations are clipped to be within the observation space limits. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv) -> None: super().__init__(env) self.env: CityLearnEnv
[docs] def observation(self, observations: List[List[float]]) -> List[List[float]]: """Returns normalized observations.""" for i, (o, s) in enumerate(zip(observations, self.observation_space)): for j, (o_, l, u) in enumerate(zip(o, s.low, s.high)): observations[i][j] = min(max(o_, l), u) return observations
[docs] class NormalizedObservationWrapper(ObservationWrapper): """Wrapper for observations min-max and periodic normalization. Temporal observations including `hour`, `day_type` and `month` are periodically normalized using sine/cosine transformations and then all observations are min-max normalized between 0 and 1. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv) -> None: super().__init__(env) self.env: CityLearnEnv @property def shared_observations(self) -> List[str]: """Names of common observations across all buildings i.e. observations that have the same value irrespective of the building. Includes extra three observations added during cyclic transformation of :code:`hour`, :code:`day_type` and :code:`month`. """ shared_observations = [] periodic_observation_names = list(Building.get_periodic_observation_metadata().keys()) for o in self.env.shared_observations: if o in periodic_observation_names: shared_observations += [f'{o}_cos', f'{o}_sin'] else: shared_observations.append(o) return shared_observations @property def observation_names(self) -> List[List[str]]: """Names of returned observations. Includes extra three observations added during cyclic transformation of :code:`hour`, :code:`day_type` and :code:`month`. Notes ----- If `central_agent` is True, a list of 1 sublist containing all building observation names is returned in the same order as `buildings`. The `shared_observations` names are only included in the first building's observation names. If `central_agent` is False, a list of sublists is returned where each sublist is a list of 1 building's observation names and the sublist in the same order as `buildings`. """ if self.env.unwrapped.central_agent: observation_names = [] for i, b in enumerate(self.env.buildings): for k, _ in b.observations(normalize=True, periodic_normalization=True).items(): if i == 0 or k not in self.shared_observations or k not in observation_names: observation_names.append(k) else: pass observation_names = [observation_names] else: observation_names = [list(b.observations(normalize=True, periodic_normalization=True).keys()) for b in self.env.buildings] return observation_names @property def observation_space(self) -> List[spaces.Box]: """Returns observation space for normalized observations.""" low_limit = [] high_limit = [] if self.env.unwrapped.central_agent: shared_observations = [] for i, b in enumerate(self.env.buildings): s = b.estimate_observation_space(normalize=True) o = b.observations(normalize=True, periodic_normalization=True) for k, lv, hv in zip(o, s.low, s.high): if i == 0 or k not in self.shared_observations or k not in shared_observations: low_limit.append(lv) high_limit.append(hv) else: pass if k in self.shared_observations and k not in shared_observations: shared_observations.append(k) else: pass observation_space = [spaces.Box(low=np.array(low_limit), high=np.array(high_limit), dtype=np.float32)] else: observation_space = [b.estimate_observation_space(normalize=True) for b in self.env.buildings] return observation_space
[docs] def observation(self, observations: List[List[float]]) -> List[List[float]]: """Returns normalized observations.""" if self.env.unwrapped.central_agent: norm_observations = [] shared_observations = [] for i, b in enumerate(self.env.buildings): for k, v in b.observations(normalize=True, periodic_normalization=True).items(): if i==0 or k not in self.shared_observations or k not in shared_observations: norm_observations.append(v) else: pass if k in self.shared_observations and k not in shared_observations: shared_observations.append(k) else: pass norm_observations = [norm_observations] else: norm_observations = [list(b.observations(normalize=True, periodic_normalization=True).values()) for b in self.env.buildings] return norm_observations
[docs] class NormalizedActionWrapper(ActionWrapper): """Wrapper for action min-max normalization. All observations are min-max normalized between 0 and 1. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv) -> None: super().__init__(env) self.env: CityLearnEnv @property def action_space(self) -> List[spaces.Box]: """Returns action space for normalized actions.""" low_limit = [] high_limit = [] if self.env.unwrapped.central_agent: for b in self.env.buildings: low_limit += [0.0]*b.action_space.low.size high_limit += [1.0]*b.action_space.high.size action_space = [spaces.Box(low=np.array(low_limit), high=np.array(high_limit), dtype=np.float32)] else: action_space = [spaces.Box( low=np.array([0.0]*b.action_space.low.size), high=np.array([1.0]*b.action_space.high.size), dtype=np.float32) for b in self.env.buildings] return action_space
[docs] def action(self, actions: List[float]) -> List[List[float]]: """Returns denormalized actions.""" transformed_actions = [] for i, s in enumerate(self.env.unwrapped.action_space): transformed_actions_ = [] for j, (l, h) in enumerate(zip(s.low, s.high)): a = actions[i][j]*(h - l) + l transformed_actions_.append(a) transformed_actions.append(transformed_actions_) return transformed_actions
[docs] class NormalizedSpaceWrapper(Wrapper): """Wrapper for normalized observation and action spaces. Wraps `env` in :py:class:`citylearn.wrappers.NormalizedObservationWrapper` and :py:class:`citylearn.wrappers.NormalizedActionWrapper`. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): env = NormalizedObservationWrapper(env) env = NormalizedActionWrapper(env) super().__init__(env) self.env: CityLearnEnv
[docs] class DiscreteObservationWrapper(ObservationWrapper): """Wrapper for observation space discretization. Parameters ---------- env: CityLearnEnv CityLearn environment. bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active observation in each building. default_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building observation. """ def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None): super().__init__(env) self.env: CityLearnEnv assert bin_sizes is None or len(bin_sizes) == len(self.env.buildings), 'length of bin_size must equal number of buildings.' self.bin_sizes = [{} for _ in self.env.buildings] if bin_sizes is None else bin_sizes self.default_bin_size = 10 if default_bin_size is None else default_bin_size self.bin_sizes = [ {o: s.get(o, self.default_bin_size) for o in b.active_observations} for b, s in zip(self.env.buildings, self.bin_sizes) ] @property def observation_space(self) -> List[spaces.MultiDiscrete]: """Returns observation space for discretized observations.""" if self.env.unwrapped.central_agent: bin_sizes = [] shared_observations = [] for i, b in enumerate(self.bin_sizes): for k, v in b.items(): if i == 0 or k not in self.env.shared_observations or k not in shared_observations: bin_sizes.append(v) else: pass if k in self.env.shared_observations and k not in shared_observations: shared_observations.append(k) else: pass observation_space = [spaces.MultiDiscrete(bin_sizes)] else: observation_space = [spaces.MultiDiscrete(list(b.values())) for b in self.bin_sizes] return observation_space
[docs] def observation(self, observations: List[List[float]]) -> np.ndarray: """Returns discretized observations.""" transformed_observations = [] for i, (cs, ds) in enumerate(zip(self.env.unwrapped.observation_space, self.observation_space)): transformed_observations_ = [] for j, (ll, hl, b) in enumerate(zip(cs.low, cs.high, ds)): o = np.digitize(observations[i][j], np.linspace(ll, hl, b.n), right=True) transformed_observations_.append(o) transformed_observations.append(transformed_observations_) return transformed_observations
[docs] class DiscreteActionWrapper(ActionWrapper): """Wrapper for action space discretization. Parameters ---------- env: CityLearnEnv CityLearn environment. bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active action in each building. default_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building action. """ def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None): super().__init__(env) self.env: CityLearnEnv assert bin_sizes is None or len(bin_sizes) == len(self.env.buildings), 'length of bin_size must equal number of buildings.' self.bin_sizes = [{} for _ in self.env.buildings] if bin_sizes is None else bin_sizes self.default_bin_size = 10 if default_bin_size is None else default_bin_size self.bin_sizes = [ {a: s.get(a, self.default_bin_size) for a in b.active_actions} for b, s in zip(self.env.buildings, self.bin_sizes) ] @property def action_space(self) -> List[spaces.MultiDiscrete]: """Returns action space for discretized actions.""" if self.env.unwrapped.central_agent: bin_sizes = [] for b in self.bin_sizes: for _, v in b.items(): bin_sizes.append(v) action_space = [spaces.MultiDiscrete(bin_sizes)] else: action_space = [spaces.MultiDiscrete(list(b.values())) for b in self.bin_sizes] return action_space
[docs] def action(self, actions: List[float]) -> List[List[float]]: """Returns undiscretized actions.""" transformed_actions = [] for i, (cs, ds) in enumerate(zip(self.env.unwrapped.action_space, self.action_space)): transformed_actions_ = [] for j, (ll, hl, b) in enumerate(zip(cs.low, cs.high, ds)): a = np.linspace(ll, hl, b.n)[actions[i][j]] transformed_actions_.append(a) transformed_actions.append(transformed_actions_) return transformed_actions
[docs] class DiscreteSpaceWrapper(Wrapper): """Wrapper for observation and action spaces discretization. Wraps `env` in :py:class:`citylearn.wrappers.DiscreteObservationWrapper` and :py:class:`citylearn.wrappers.DiscreteActionWrapper`. Parameters ---------- env: CityLearnEnv CityLearn environment. observation_bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active observation in each building. action_bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active action in each building. default_observation_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building observation. default_action_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building action. """ def __init__(self, env: CityLearnEnv, observation_bin_sizes: List[Mapping[str, int]] = None, action_bin_sizes: List[Mapping[str, int]] = None, default_observation_bin_size: int = None, default_action_bin_size: int = None): env = DiscreteObservationWrapper(env, bin_sizes=observation_bin_sizes, default_bin_size=default_observation_bin_size) env = DiscreteActionWrapper(env, bin_sizes=action_bin_sizes, default_bin_size=default_action_bin_size) super().__init__(env) self.env: CityLearnEnv
[docs] class TabularQLearningObservationWrapper(ObservationWrapper): """Observation wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent. Wraps `env` in :py:class:`citylearn.wrappers.DiscreteObservationWrapper`. Parameters ---------- env: CityLearnEnv CityLearn environment. bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active observation in each building. default_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building observation. """ def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None) -> None: env = DiscreteObservationWrapper(env, bin_sizes=bin_sizes, default_bin_size=default_bin_size) super().__init__(env) self.env: CityLearnEnv self.combinations = self.set_combinations() @property def observation_space(self) -> List[spaces.Discrete]: """Returns observation space for discretized observations.""" observation_space = [] for c in self.combinations: observation_space.append(spaces.Discrete(len(c) - 1)) return observation_space
[docs] def observation(self, observations: List[List[int]]) -> List[List[int]]: """Returns discretized observations.""" return [[c.index(tuple(o))] for o, c in zip(observations, self.combinations)]
[docs] def set_combinations(self) -> List[List[int]]: """Returns all combinations of discrete observations.""" combs_list = [] for s in self.env.observation_space: options = [list(range(d.n + 1)) for d in s] combs = list(itertools.product(*options)) combs_list.append(combs) return combs_list
[docs] class TabularQLearningActionWrapper(ActionWrapper): """Action wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent. Wraps `env` in :py:class:`citylearn.wrappers.DiscreteActionWrapper`. Parameters ---------- env: CityLearnEnv CityLearn environment. bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active action in each building. default_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building action. """ def __init__(self, env: CityLearnEnv, bin_sizes: List[Mapping[str, int]] = None, default_bin_size: int = None) -> None: env = DiscreteActionWrapper(env, bin_sizes=bin_sizes, default_bin_size=default_bin_size) super().__init__(env) self.env: CityLearnEnv self.combinations = self.set_combinations() @property def action_space(self) -> List[spaces.Discrete]: """Returns action space for discretized actions.""" action_space = [] for c in self.combinations: action_space.append(spaces.Discrete(len(c))) return action_space
[docs] def action(self, actions: List[float]) -> List[List[int]]: """Returns discretized actions.""" return [list(c[a[0]]) for a, c in zip(actions, self.combinations)]
[docs] def set_combinations(self) -> List[List[int]]: """Returns all combinations of discrete actions.""" combs_list = [] for s in self.env.action_space: options = [list(range(d.n)) for d in s] combs = list(itertools.product(*options)) combs_list.append(combs) return combs_list
[docs] class TabularQLearningWrapper(Wrapper): """Wrapper for :py:class:`citylearn.agents.q_learning.TabularQLearning` agent. Discretizes observation and action spaces. Parameters ---------- env: CityLearnEnv CityLearn environment. observation_bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active observation in each building. action_bin_sizes: List[Mapping[str, int]], optional Then number of bins for each active action in each building. default_observation_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building observation. default_action_bin_size: int, default = 10 The default number of bins if `bin_sizes` is unspecified for any active building action. """ def __init__(self, env: CityLearnEnv, observation_bin_sizes: List[Mapping[str, int]] = None, action_bin_sizes: List[Mapping[str, int]] = None, default_observation_bin_size: int = None, default_action_bin_size: int = None): env = TabularQLearningObservationWrapper(env, bin_sizes=observation_bin_sizes, default_bin_size=default_observation_bin_size) env = TabularQLearningActionWrapper(env, bin_sizes=action_bin_sizes, default_bin_size=default_action_bin_size) super().__init__(env) self.env: CityLearnEnv
[docs] class StableBaselines3ObservationWrapper(ObservationWrapper): """Observation wrapper for :code:`stable-baselines3` algorithms. Wraps observations so that they are returned in a 1-dimensional numpy array. This wrapper is only compatible when the environment is controlled by a central agent i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert env.unwrapped.central_agent, 'StableBaselines3ObservationWrapper is compatible only when env.central_agent = True.'\ ' First set env.central_agent = True to use this wrapper.' super().__init__(env) self.env: CityLearnEnv @property def observation_space(self) -> spaces.Box: """Returns single spaces Box object.""" return self.env.observation_space[0]
[docs] def observation(self, observations: List[List[float]]) -> np.ndarray: """Returns observations as 1-dimensional numpy array.""" return np.array(observations[0], dtype='float32')
[docs] class StableBaselines3ActionWrapper(ActionWrapper): """Action wrapper for :code:`stable-baselines3` algorithms. Wraps actions so that they are returned in a 1-dimensional numpy array. This wrapper is only compatible when the environment is controlled by a central agent i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert env.unwrapped.central_agent, 'StableBaselines3ActionWrapper is compatible only when env.central_agent = True.'\ ' First set env.central_agent = True to use this wrapper.' super().__init__(env) self.env: CityLearnEnv @property def action_space(self) -> spaces.Box: """Returns single spaces Box object.""" return self.env.action_space[0]
[docs] def action(self, actions: List[float]) -> List[List[float]]: """Returns actions as 1-dimensional numpy array.""" return [actions]
[docs] class StableBaselines3RewardWrapper(RewardWrapper): """Reward wrapper for :code:`stable-baselines3` algorithms. Wraps rewards so that it is returned as float value. This wrapper is only compatible when the environment is controlled by a central agent i.e., :py:attr:`citylearn.citylearn.CityLearnEnv.central_agent` = True. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert env.unwrapped.central_agent, 'StableBaselines3RewardWrapper is compatible only when env.central_agent = True.'\ ' First set env.central_agent = True to use this wrapper.' super().__init__(env) self.env: CityLearnEnv
[docs] def reward(self, reward: List[float]) -> float: """Returns reward as float value.""" return reward[0]
[docs] class StableBaselines3Wrapper(Wrapper): """Wrapper for :code:`stable-baselines3` algorithms. Wraps observations so that they are returned in a 1-dimensional numpy array. Wraps actions so that they are returned in a 1-dimensional numpy array. Wraps rewards so that it is returned as float value. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): env = StableBaselines3ActionWrapper(env) env = StableBaselines3RewardWrapper(env) env = StableBaselines3ObservationWrapper(env) super().__init__(env) self.env: CityLearnEnv
[docs] class RLlibSingleAgentWrapper(StableBaselines3Wrapper): """Wrapper for :code:`RLlib` single-agent algorithms. Uses the same wrapper as :code:`stable-baselines3` by wrapping `env` in :py:class:`citylearn.wrappers.StableBaselines3Wrapper`. Parameters ---------- env_config: Mapping[str, Any] Dictionary providing initialization parameters for the environment. Must contain `env_kwargs` as a key where `env_kwargs` is a `dict` used to initialize :py:class:`citylearn.citylearn.CityLearnEnv`. Thus it must contain all with positional arguments needed for intialization and optionally contain optional intialization arguments. `env_config` can also contain a `wrappers` key that is a list of :py:mod:`citylearn.wrappers` classes to wrap :py:class:`citylearn.citylearn.CityLearnEnv` with. Wrapping with :py:class:`citylearn.wrappers.ClippedObservationWrapper` is recommended to avoid having the simulation terminating prematurely with an error due to out of bound observations relative to the observation space. Notes ----- This wrapper is only compatible with an environment where :py:attr:`citylearn.citylearn.central_agent`=`True` and will initialize the environment as such, overriding any value for `central_agent` in `env_kwargs`. """ def __init__(self, env_config: Mapping[str, Any]): env_kwargs = env_config['env_kwargs'] env_kwargs['central_agent'] = True assert 'schema' in env_kwargs, 'missing schema key in env_kwargs.' env = CityLearnEnv(**env_kwargs) wrappers = env_config.get('wrappers') wrappers = [] if wrappers is None else wrappers for w in wrappers: env = w(env) super().__init__(env)
[docs] class RLlibMultiAgentObservationWrapper(ObservationWrapper): """Observation wrapper for :code:`RLlib` multi-agent algorithms. Wraps observation space and observations so that they are returned as :py:class:`gymnasium.spaces.Dict` and `dict` objects respectively. The keys in these objects correspond to the agent IDs i.e., policy IDs in the multi-agent. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert not env.central_agent, 'RLlibMultiAgentObservationWrapper is'\ ' compatible only when env.central_agent = False.'\ ' First set env.central_agent = False to use this wrapper.' super().__init__(env) self.env: CityLearnEnv @property def observation_space(self) -> spaces.Dict: """Parses observation space into a :py:class:`gymnasium.spaces.Dict`.""" return spaces.Dict({f'agent_{i}': s for i, s in enumerate(self.env.observation_space)})
[docs] def observation( self, observations: List[List[float]] ) -> Mapping[str, np.ndarray]: """Parses observation into a dictionary.""" return {f'agent_{i}': np.array(o, dtype='float32') for i, o in enumerate(observations)}
[docs] class RLlibMultiAgentActionWrapper(ActionWrapper): """Action wrapper for :code:`RLlib` multi-agent algorithms. Wraps action space so that it is returned as :py:class:`gymnasium.spaces.Dict`. The keys correspond to the agent IDs i.e., policy IDs in the multi-agent. Also converts agent actions from `dict` to data structure need by :py:meth:`citylearn.citylearn.CityLearnEnv.step`. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert not env.central_agent, 'RLlibMultiAgentActionWrapper is'\ ' compatible only when env.central_agent = False.'\ ' First set env.central_agent = False to use this wrapper.' super().__init__(env) self.env: CityLearnEnv @property def action_space(self) -> spaces.Dict: """Parses action space into a :py:class:`gymnasium.spaces.Dict`.""" return spaces.Dict({f'agent_{i}': s for i, s in enumerate(self.env.action_space)})
[docs] def action(self, actions: Mapping[str, np.ndarray]) -> List[List[float]]: """Parses actions into data structure for :py:meth:`citylearn.citylearn.CityLearnEnv.step`.""" return [list(v) for v in actions.values()]
[docs] class RLlibMultiAgentRewardWrapper(RewardWrapper): """Action wrapper for :code:`RLlib` multi-agent algorithms. Wraps action space so that it is returned as a `dict` mapping agent IDs to reward values. Parameters ---------- env: CityLearnEnv CityLearn environment. """ def __init__(self, env: CityLearnEnv): assert not env.central_agent, 'RLlibMultiAgentRewardWrapper is'\ ' compatible only when env.central_agent = False.'\ ' First set env.central_agent = False to use this wrapper.' super().__init__(env) self.env: CityLearnEnv
[docs] def reward(self, reward: List[float]) -> Mapping[str, float]: """Parses reward into a `dict`.""" return {f'agent_{i}': r for i, r in enumerate(reward)}
[docs] class RLlibMultiAgentEnv(MultiAgentEnv): """Wrapper for :code:`RLlib` multi-agent algorithms. Converts, observation and action spaces to :py:class:`gymnasium.spaces.Dict`. Also converts `observations`, `actions`, `rewards`, `terminated`, and `truncated` to dictionaries where necessary. The dictionary keys correspond to the agent IDs i.e., policy IDs in the multi-agent. Agent IDs are accessible through the `_agent_ids` property. The initialized environment is a :py:class:`ray.rllib.env.MultiAgentEnv` object and has an `env` attribute that is :py:class:`citylearn.citylearn.CityLearnEnv` object. Parameters ---------- env_config: Mapping[str, Any] Dictionary providing initialization parameters for the environment. Must contain `env_kwargs` as a key where `env_kwargs` is a `dict` used to initialize :py:class:`citylearn.citylearn.CityLearnEnv`. Thus it must contain all with positional arguments needed for intialization and optionally contain optional intialization arguments. `env_config` can also contain a `wrappers` key that is a list of :py:mod:`citylearn.wrappers` classes to wrap :py:class:`citylearn.citylearn.CityLearnEnv` with. Wrapping with :py:class:`citylearn.wrappers.ClippedObservationWrapper` is recommended to avoid having the simulation terminating prematurely with an error due to out of bound observations relative to the observation space. Notes ----- This wrapper is only compatible with an environment where :py:attr:`citylearn.citylearn.central_agent`=`False` and will initialize the environment as such, overriding any value for `central_agent` in `env_kwargs`. """ def __init__(self, env_config: Mapping[str, Any]): if MultiAgentEnv == Env: raise Exception('This functionality requires you to install RLlib.'\ 'You can install RLlib from pip: pip install "ray[rllib]", '\ 'or for more detailed instructions please visit https://docs.ray.io/en/latest/rllib/index.html.') else: pass super().__init__() env_kwargs = env_config['env_kwargs'] env_kwargs['central_agent'] = False assert 'schema' in env_kwargs, 'missing schema key in env_kwargs.' env = CityLearnEnv(**env_kwargs) wrappers = env_config.get('wrappers') wrappers = [] if wrappers is None else wrappers for w in wrappers: env = w(env) env = RLlibMultiAgentActionWrapper(env) env = RLlibMultiAgentObservationWrapper(env) env = RLlibMultiAgentRewardWrapper(env) self.env: CityLearnEnv = env self._agent_ids = [f'agent_{i}' for i in range(len(self.buildings))] self.observation_space: spaces.Dict = self.env.observation_space self.action_space: spaces.Dict = self.env.action_space @property def time_step(self) -> int: """Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.time_step`.""" return self.env.time_step @property def buildings(self) -> List[Building]: """Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.buildings`.""" return self.env.buildings @property def terminated(self) -> bool: """Convenience property for :py:meth:`citylearn.citylearn.CityLearnEnv.terminated`.""" return self.env.terminated
[docs] def step( self, action_dict: Mapping[str, np.ndarray] ) -> Tuple[Mapping[str, np.ndarray], Mapping[str, float], Mapping[str, bool], Mapping[str, bool], Mapping[str, dict]]: """Calls :py:meth:`citylearn.citylearn.CityLearnEnv.step` and parses returned values into dictionaries.""" observations, reward, terminated, truncated, info = self.env.step(action_dict) terminated = {'__all__': terminated, **{a: terminated for a in self._agent_ids}} truncated = {'__all__': truncated, **{a: truncated for a in self._agent_ids}} info = {a: info for a in self._agent_ids} return observations, reward, terminated, truncated, info
[docs] def evaluate(self, **kwargs) -> pd.DataFrame: """Convenience method for :py:meth:`citylearn.citylearn.CityLearnEnv.evaluate`.""" return self.env.evaluate(**kwargs)
[docs] def reset(self, *, seed: int = None, options: Mapping[str, Any] = None) -> Tuple[Mapping[str, np.ndarray], Mapping[str, dict]]: """Calls :py:meth:`citylearn.citylearn.CityLearnEnv.reset` and parses returned values into dictionaries.""" observations, info = self.env.reset(seed=seed, options=options) info = {a: info for a in self._agent_ids} return observations, info