citylearn.agents.rlc module

class citylearn.agents.rlc.RLC(*args, hidden_dimension: Optional[List[float]] = None, discount: Optional[float] = None, tau: Optional[float] = None, alpha: Optional[float] = None, lr: Optional[float] = None, batch_size: Optional[int] = None, replay_buffer_capacity: Optional[int] = None, start_training_time_step: Optional[int] = None, end_exploration_time_step: Optional[int] = None, deterministic_start_time_step: Optional[int] = None, action_scaling_coefficienct: Optional[float] = None, reward_scaling: Optional[float] = None, update_per_time_step: Optional[int] = None, seed: Optional[int] = None, **kwargs)[source]

Bases: citylearn.agents.base.Agent

property action_scaling_coefficient: float

Action scaling coefficient.

property alpha: float

Temperature; exploration-exploitation balance term.

property batch_size: int

Batch size.

property deterministic_start_time_step: int

Time step to begin taking deterministic actions.

property discount: float

Discount factor.

property end_exploration_time_step: int

Time step to stop exploration.

property hidden_dimension: List[float]

Hidden dimension.

property lr: float

Learning rate.

property observation_dimension: int

Number of observations after applying encoders.

property replay_buffer_capacity: int

Replay buffer capacity.

property reward_scaling: float

Reward scaling.

property seed: int

Pseudorandom number generator seed for repeatable results.

property start_training_time_step: int

Time step to end exploration phase.

property tau: float

Decay rate.

property update_per_time_step: int

Number of updates per time step.