13#ifndef MLPACK_METHODS_RL_SAC_HPP
14#define MLPACK_METHODS_RL_SAC_HPP
57 typename EnvironmentType,
58 typename QNetworkType,
59 typename PolicyNetworkType,
61 typename ReplayType = RandomReplay<EnvironmentType>
89 QNetworkType& learningQ1Network,
90 PolicyNetworkType& policyNetwork,
91 ReplayType& replayMethod,
92 UpdaterType qNetworkUpdater = UpdaterType(),
93 UpdaterType policyNetworkUpdater = UpdaterType(),
94 EnvironmentType environment = EnvironmentType());
149 QNetworkType& learningQ1Network;
150 QNetworkType learningQ2Network;
153 QNetworkType targetQ1Network;
154 QNetworkType targetQ2Network;
157 PolicyNetworkType& policyNetwork;
160 ReplayType& replayMethod;
163 UpdaterType qNetworkUpdater;
164 #if ENS_VERSION_MAJOR >= 2
165 typename UpdaterType::template Policy<arma::mat, arma::mat>*
166 qNetworkUpdatePolicy;
170 UpdaterType policyNetworkUpdater;
171 #if ENS_VERSION_MAJOR >= 2
172 typename UpdaterType::template Policy<arma::mat, arma::mat>*
173 policyNetworkUpdatePolicy;
177 EnvironmentType environment;
199#include "sac_impl.hpp"
The mean squared error performance function measures the network's performance according to the mean ...
Implementation of Soft Actor-Critic, a model-free off-policy actor-critic based deep reinforcement le...
StateType & State()
Modify the state of the agent.
void SoftUpdate(double rho)
Softly update the learning Q network parameters to the target Q network parameters.
double Episode()
Execute an episode.
SAC(TrainingConfig &config, QNetworkType &learningQ1Network, PolicyNetworkType &policyNetwork, ReplayType &replayMethod, UpdaterType qNetworkUpdater=UpdaterType(), UpdaterType policyNetworkUpdater=UpdaterType(), EnvironmentType environment=EnvironmentType())
Create the SAC object with given settings.
size_t & TotalSteps()
Modify total steps from beginning.
const bool & Deterministic() const
Get the indicator of training mode / test mode.
const size_t & TotalSteps() const
Get total steps from beginning.
bool & Deterministic()
Modify the training mode / test mode indicator.
typename EnvironmentType::Action ActionType
Convenient typedef for action.
const StateType & State() const
Get the state of the agent.
const ActionType & Action() const
Get the action of the agent.
void SelectAction()
Select an action, given an agent.
typename EnvironmentType::State StateType
Convenient typedef for state.
void Update()
Update the Q and policy networks.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.