Open In Colab

QuickStart

Install the latest CityLearn version from PyPi with the :code:pip command:

[ ]:
!pip install CityLearn

CityLearn Control Agents

No Control (Baseline)

Run the following to simulate an environment where the storage systems and heat pumps are not controlled (baseline). The storage actions prescribed will be 0.0 and the heat pump will have no action, i.e. None, causing it to deliver the ideal load in the building time series files:

[1]:
from citylearn.agents.base import BaselineAgent as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
model = Agent(env)

# train
model.learn(episodes=1)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
/Users/kingsleyenweye/Desktop/INTELLIGENT_ENVIRONMENT_LAB/citylearn/CityLearn/test-py311-env/lib/python3.11/site-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 1.000
annual_normalized_unserved_energy_total 0.019 0.018 0.018 0.018
carbon_emissions_total 1.000 1.000 1.000 1.000
cost_total 1.000 1.000 1.000 1.000
daily_one_minus_load_factor_average NaN NaN NaN 1.000
daily_peak_average NaN NaN NaN 1.000
discomfort_cold_delta_average 1.611 0.043 0.643 0.766
discomfort_cold_delta_maximum 4.741 1.772 3.466 3.326
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.360 0.000 0.082 0.147
discomfort_hot_delta_average 0.067 0.580 0.100 0.249
discomfort_hot_delta_maximum 5.384 5.847 3.835 5.022
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.010 0.034 0.003 0.016
discomfort_proportion 0.370 0.034 0.085 0.163
electricity_consumption_total 1.000 1.000 1.000 1.000
monthly_one_minus_load_factor_average NaN NaN NaN 1.000
one_minus_thermal_resilience_proportion 0.333 0.571 0.133 0.346
power_outage_normalized_unserved_energy_total 0.723 0.692 0.637 0.684
ramping_average NaN NaN NaN 1.000
zero_net_energy 1.000 1.000 1.000 1.000

Centralized RBC

Run the following to simulate an environment controlled by centralized RBC agent for a single episode:

[2]:
from citylearn.agents.rbc import BasicRBC as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
model = Agent(env)

# train
model.learn(episodes=1)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 1.179
annual_normalized_unserved_energy_total 0.017 0.016 0.016 0.016
carbon_emissions_total 1.997 1.936 1.737 1.890
cost_total 1.930 1.877 1.709 1.839
daily_one_minus_load_factor_average NaN NaN NaN 0.721
daily_peak_average NaN NaN NaN 1.352
discomfort_cold_delta_average 9.731 3.446 3.163 5.446
discomfort_cold_delta_maximum 13.562 9.930 5.399 9.630
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.975 0.892 0.953 0.940
discomfort_hot_delta_average 0.007 0.021 0.010 0.012
discomfort_hot_delta_maximum 1.693 4.220 3.269 3.061
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.000 0.007 0.003 0.004
discomfort_proportion 0.975 0.899 0.957 0.944
electricity_consumption_total 1.993 1.962 1.751 1.902
monthly_one_minus_load_factor_average NaN NaN NaN 0.863
one_minus_thermal_resilience_proportion 0.667 0.429 0.267 0.454
power_outage_normalized_unserved_energy_total 0.781 0.759 0.711 0.750
ramping_average NaN NaN NaN 0.961
zero_net_energy 2.058 1.993 1.769 1.940

Decentralized-Independent SAC

Run the following to simulate an environment controlled by decentralized-independent SAC agents for 1 training episode:

[3]:
from citylearn.agents.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=False)
model = Agent(env)

# train
model.learn(episodes=2, deterministic_finish=True)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 0.948
annual_normalized_unserved_energy_total 0.013 0.013 0.012 0.013
carbon_emissions_total 0.929 0.969 0.944 0.947
cost_total 0.892 0.931 0.912 0.912
daily_one_minus_load_factor_average NaN NaN NaN 0.946
daily_peak_average NaN NaN NaN 0.927
discomfort_cold_delta_average 1.819 0.911 0.875 1.201
discomfort_cold_delta_maximum 6.324 4.555 2.977 4.619
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.412 0.321 0.238 0.324
discomfort_hot_delta_average 0.270 0.550 0.302 0.374
discomfort_hot_delta_maximum 4.846 6.216 3.977 5.013
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.046 0.153 0.030 0.076
discomfort_proportion 0.459 0.474 0.268 0.400
electricity_consumption_total 0.939 0.987 0.958 0.961
monthly_one_minus_load_factor_average NaN NaN NaN 0.996
one_minus_thermal_resilience_proportion 0.733 0.500 0.133 0.456
power_outage_normalized_unserved_energy_total 0.626 0.649 0.595 0.624
ramping_average NaN NaN NaN 0.883
zero_net_energy 0.953 0.989 0.959 0.967

Decentralized-Cooperative MARLISA

Run the following to simulate an environment controlled by decentralized-cooperative MARLISA agents for 1 training episodes:

[4]:
from citylearn.agents.marlisa import MARLISA as Agent
from citylearn.citylearn import CityLearnEnv

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=False)
model = Agent(env)

# train
model.learn(episodes=2, deterministic_finish=True)

# test
kpis = model.env.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 0.948
annual_normalized_unserved_energy_total 0.013 0.013 0.012 0.013
carbon_emissions_total 0.938 0.964 0.941 0.948
cost_total 0.900 0.925 0.909 0.912
daily_one_minus_load_factor_average NaN NaN NaN 0.945
daily_peak_average NaN NaN NaN 0.927
discomfort_cold_delta_average 1.875 0.912 0.866 1.218
discomfort_cold_delta_maximum 6.396 4.549 2.959 4.635
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.431 0.317 0.235 0.327
discomfort_hot_delta_average 0.256 0.553 0.309 0.373
discomfort_hot_delta_maximum 4.776 6.274 3.994 5.015
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.046 0.153 0.030 0.076
discomfort_proportion 0.477 0.470 0.265 0.404
electricity_consumption_total 0.948 0.982 0.956 0.962
monthly_one_minus_load_factor_average NaN NaN NaN 0.996
one_minus_thermal_resilience_proportion 0.733 0.500 0.133 0.456
power_outage_normalized_unserved_energy_total 0.628 0.638 0.595 0.620
ramping_average NaN NaN NaN 0.884
zero_net_energy 0.962 0.985 0.956 0.968

Other Standard Reinforcement Learning Libraries

Stable Baselines3 Reinforcement Learning Algorithms

Install the latest version of Stable Baselines3:

[ ]:
!pip install stable-baselines3

Before the environment is ready for use in Stable Baselines3, it needs to be wrapped. Firstly, wrap the environment using the NormalizedObservationWrapper (see docs) to ensure that observations served to the agent are min-max normalized between [0, 1] and cyclical observations e.g. hour, are encoded using the cosine transformation.

Next, we wrap with the StableBaselines3Wrapper (see docs) that ensures observations, actions and rewards are served in manner that is compatible with Stable Baselines3 interface.

⚠️ NOTE: central_agent in the env must be True when using Stable Baselines3 as it does not support multi-agents.

[5]:
from stable_baselines3.sac import SAC as Agent
from citylearn.citylearn import CityLearnEnv
from citylearn.wrappers import NormalizedObservationWrapper, StableBaselines3Wrapper

# initialize
env = CityLearnEnv('citylearn_challenge_2023_phase_2_local_evaluation', central_agent=True)
env = NormalizedObservationWrapper(env)
env = StableBaselines3Wrapper(env)
model = Agent('MlpPolicy', env)

# train
episodes = 2
model.learn(total_timesteps=env.unwrapped.time_steps*episodes)

# test
observations, _ = env.reset()

while not env.unwrapped.terminated:
    actions, _ = model.predict(observations, deterministic=True)
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
2024-04-01 20:37:07,558 INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2024-04-01 20:37:08,286 INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 0.834
annual_normalized_unserved_energy_total 0.015 0.012 0.014 0.014
carbon_emissions_total 0.396 0.449 0.501 0.449
cost_total 0.375 0.413 0.471 0.419
daily_one_minus_load_factor_average NaN NaN NaN 1.316
daily_peak_average NaN NaN NaN 0.709
discomfort_cold_delta_average 0.000 0.004 0.001 0.002
discomfort_cold_delta_maximum 0.124 0.581 0.372 0.359
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.000 0.000 0.000 0.000
discomfort_hot_delta_average 9.465 7.146 8.046 8.219
discomfort_hot_delta_maximum 16.783 14.850 14.003 15.212
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.982 0.978 0.978 0.979
discomfort_proportion 0.982 0.978 0.978 0.979
electricity_consumption_total 0.401 0.453 0.508 0.454
monthly_one_minus_load_factor_average NaN NaN NaN 1.126
one_minus_thermal_resilience_proportion 1.000 1.000 1.000 1.000
power_outage_normalized_unserved_energy_total 0.570 0.509 0.555 0.545
ramping_average NaN NaN NaN 0.974
zero_net_energy 0.275 0.362 0.453 0.363

RLlib

Install the latest version of RLlib:

[ ]:
!pip install "ray[rllib]"

We advise that you include the ClippedObservationWrapper (see docs) wrapper when working with RLlib so that observations are always clipped within the observation space before sending to the agent if not, out-of-bound observations will raise a ValueError and terminate the training.

We also wrap the environment with NormalizedObservationWrapper (see docs) to ensure that observations served to the agent are min-max normalized between [0, 1] and cyclical observations e.g. hour, are encoded using the cosine transformation.

RLlib supports both single-agent and multi-agent algorithms. See below for an example for either case.

Single Agent

The single-agent interface for RLlib is the RLlibSingleAgentWrapper wrapper.

[10]:
import warnings
from citylearn.wrappers import ClippedObservationWrapper, NormalizedObservationWrapper, RLlibSingleAgentWrapper
from ray.rllib.algorithms.sac import SACConfig as Config

warnings.filterwarnings('ignore', category=DeprecationWarning)

# initialize
env_config = {
    'env_kwargs': {
        'schema': 'citylearn_challenge_2023_phase_2_local_evaluation',
    },
    'wrappers': [
        NormalizedObservationWrapper,
        ClippedObservationWrapper
    ]
}
config = (
    Config()
    .environment(RLlibSingleAgentWrapper, env_config=env_config)
)
model = config.build()

# train
for i in range(2):
    _ = model.train()

# test
env = RLlibSingleAgentWrapper(env_config)
observations, _ = env.reset()

while not env.unwrapped.terminated:
    actions = model.compute_single_action(observations, explore=False)
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
2024-04-01 20:53:15,333 WARNING util.py:62 -- Install gputil for GPU system monitoring.
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 1.063
annual_normalized_unserved_energy_total 0.015 0.016 0.014 0.015
carbon_emissions_total 1.632 1.598 1.459 1.563
cost_total 1.572 1.548 1.427 1.516
daily_one_minus_load_factor_average NaN NaN NaN 0.740
daily_peak_average NaN NaN NaN 1.166
discomfort_cold_delta_average 7.743 2.896 2.563 4.401
discomfort_cold_delta_maximum 12.215 8.302 5.262 8.593
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.968 0.733 0.757 0.819
discomfort_hot_delta_average 0.013 0.093 0.013 0.039
discomfort_hot_delta_maximum 2.547 5.028 3.506 3.694
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.003 0.017 0.003 0.008
discomfort_proportion 0.971 0.750 0.760 0.827
electricity_consumption_total 1.643 1.626 1.479 1.583
monthly_one_minus_load_factor_average NaN NaN NaN 0.891
one_minus_thermal_resilience_proportion 0.800 0.571 0.267 0.546
power_outage_normalized_unserved_energy_total 0.781 0.796 0.708 0.762
ramping_average NaN NaN NaN 0.885
zero_net_energy 1.687 1.643 1.488 1.606

Multi-agent

The multi-agent interface for RLlib is the RLlibMultiAgentEnv wrapper.

[12]:
import warnings
from citylearn.wrappers import ClippedObservationWrapper, NormalizedObservationWrapper, RLlibMultiAgentEnv
from ray.rllib.algorithms.sac import SACConfig as Config
from ray.rllib.policy.policy import PolicySpec

warnings.filterwarnings('ignore', category=DeprecationWarning)

# initialize
env_config = {
    'env_kwargs': {
        'schema': 'citylearn_challenge_2023_phase_2_local_evaluation',
    },
    'wrappers': [
        NormalizedObservationWrapper,
        ClippedObservationWrapper
    ]
}
config = (
    Config()
    .environment(RLlibMultiAgentEnv, env_config=env_config)
    .multi_agent(
        policies={a: PolicySpec() for a in RLlibMultiAgentEnv(env_config)._agent_ids},
        policy_mapping_fn=lambda agent_id, episode, worker, **kwargs: agent_id,
    )
)
model = config.build()

# train
for i in range(2):
    _ = model.train()

# test
env = RLlibMultiAgentEnv(env_config)
observations, _ = env.reset()

while not env.terminated:
    actions = {p: model.compute_single_action(o, policy_id=p, explore=False) for p, o in observations.items()}
    observations, _, _, _, _ = env.step(actions)

kpis = env.unwrapped.evaluate()
kpis = kpis.pivot(index='cost_function', columns='name', values='value').round(3)
kpis = kpis.dropna(how='all')
display(kpis)
2024-04-01 20:56:03,937 WARNING util.py:62 -- Install gputil for GPU system monitoring.
name Building_1 Building_2 Building_3 District
cost_function
all_time_peak_average NaN NaN NaN 1.063
annual_normalized_unserved_energy_total 0.015 0.016 0.014 0.015
carbon_emissions_total 1.639 1.599 1.462 1.566
cost_total 1.579 1.548 1.431 1.519
daily_one_minus_load_factor_average NaN NaN NaN 0.739
daily_peak_average NaN NaN NaN 1.168
discomfort_cold_delta_average 7.789 2.896 2.566 4.417
discomfort_cold_delta_maximum 12.260 8.308 5.261 8.610
discomfort_cold_delta_minimum 0.000 0.000 0.000 0.000
discomfort_cold_proportion 0.968 0.731 0.759 0.819
discomfort_hot_delta_average 0.013 0.093 0.013 0.039
discomfort_hot_delta_maximum 2.521 5.046 3.506 3.691
discomfort_hot_delta_minimum 0.000 0.000 0.000 0.000
discomfort_hot_proportion 0.003 0.017 0.003 0.008
discomfort_proportion 0.971 0.748 0.762 0.827
electricity_consumption_total 1.650 1.626 1.482 1.586
monthly_one_minus_load_factor_average NaN NaN NaN 0.890
one_minus_thermal_resilience_proportion 0.800 0.571 0.267 0.546
power_outage_normalized_unserved_energy_total 0.782 0.797 0.708 0.762
ramping_average NaN NaN NaN 0.885
zero_net_energy 1.695 1.643 1.491 1.609