double_pendulum.controller.DQN

Submodules

double_pendulum.controller.DQN.DQN_controller

class double_pendulum.controller.DQN.DQN_controller.DQNController(experiment_path, actions, dynamics_func, dt)

Bases: AbstractController

get_control_output_(x, t=None)

The function to compute the control input for the double pendulum’s actuator(s). Supposed to be overwritten by actual controllers. The API of this method should not be changed. Unused inputs/outputs can be set to None.

Parameters:
xarray_like, shape=(4,), dtype=float,

state of the double pendulum, order=[angle1, angle2, velocity1, velocity2], units=[rad, rad, rad/s, rad/s]

tfloat, optional

time, unit=[s] (Default value=None)

Returns:
array_like

shape=(2,), dtype=float actuation input/motor torque, order=[u1, u2], units=[Nm]

double_pendulum.controller.DQN.environment

double_pendulum.controller.DQN.exploration

class double_pendulum.controller.DQN.exploration.EpsilonGreedySchedule(starting_eps: float, ending_eps: float, duration_eps: int, key: PRNGKeyArray, current_exploration_step: int)

Bases: object

explore() bool
explore_(key, exploration_step)

double_pendulum.controller.DQN.networks

class double_pendulum.controller.DQN.networks.BaseQ(q_inputs: dict, n_actions: int, gamma: float, network: Module, network_key: PRNGKeyArray, learning_rate: float, n_training_steps_per_online_update: int)

Bases: object

best_action(params: FrozenDict, state: Array, key: PRNGKey) int8
compute_target(params: FrozenDict, samples: FrozenDict) Array
learn_on_batch(params: FrozenDict, params_target: FrozenDict, optimizer_state: Tuple, batch_samples: Array) Tuple[FrozenDict, FrozenDict, float32]
loss(params: FrozenDict, params_target: FrozenDict, samples: FrozenDict, ord: int = 2) float32
static metric(error: Array, ord: str) float32
random_action(key: PRNGKeyArray) int8
save(path: str) None
update_online_params(step: int, replay_buffer: ReplayBuffer, key: PRNGKeyArray) float32
update_target_params(step: int) None
class double_pendulum.controller.DQN.networks.DQN(state_shape: list, n_actions: int, gamma: float, layers: Sequence[int], network_key: PRNGKeyArray, learning_rate: float, n_training_steps_per_online_update: int, n_training_steps_per_target_update: int)

Bases: BaseQ

apply(params: FrozenDict, states: Array) Array
best_action(params: FrozenDict, state: Array, key: PRNGKey) int8
compute_target(params: FrozenDict, samples: FrozenDict) Array
loss(params: FrozenDict, params_target: FrozenDict, samples: FrozenDict) float32
update_target_params(step: int) None
class double_pendulum.controller.DQN.networks.MLP(features: Sequence[int], parent: Union[Type[flax.linen.module.Module], Type[flax.core.scope.Scope], Type[flax.linen.module._Sentinel], NoneType] = <flax.linen.module._Sentinel object at 0x7f26cf9bef40>, name: Union[str, NoneType] = None)

Bases: Module

features: Sequence[int]
name: str | None = None
parent: Type[Module] | Type[Scope] | Type[_Sentinel] | None = None
scope: Scope | None = None

double_pendulum.controller.DQN.replay_buffer

class double_pendulum.controller.DQN.replay_buffer.ReplayBuffer(max_size: int, batch_size: int, state_shape: list, state_dtype: Type, clipping)

Bases: object

add(state: ndarray, action: ndarray, reward: ndarray, next_state: ndarray, absorbing: ndarray) None
static create_batch(states: ndarray, actions: ndarray, rewards: ndarray, next_states: ndarray, absorbings: ndarray) Dict[str, Array]
get_sample_indexes(key: PRNGKeyArray, maxval: int) Array
load(path: str) None
sample_random_batch(sample_key: PRNGKeyArray) Dict[str, Array]
save(path) None

double_pendulum.controller.DQN.simulate

double_pendulum.controller.DQN.simulate.simulate(experiment_path, actions)

double_pendulum.controller.DQN.utils

double_pendulum.controller.DQN.utils.load_pickled_data(path: str, device_put: bool = False)
double_pendulum.controller.DQN.utils.save_pickled_data(path: str, object)