13#ifndef MLPACK_METHODS_RL_TRAINING_CONFIG_HPP
14#define MLPACK_METHODS_RL_TRAINING_CONFIG_HPP
25 targetNetworkSyncInterval(100),
31 doubleQLearning(false),
32 noisyQLearning(false),
42 size_t updateInterval,
43 size_t targetNetworkSyncInterval,
45 size_t explorationSteps,
56 numWorkers(numWorkers),
57 updateInterval(updateInterval),
58 targetNetworkSyncInterval(targetNetworkSyncInterval),
60 explorationSteps(explorationSteps),
63 gradientLimit(gradientLimit),
64 doubleQLearning(doubleQLearning),
65 noisyQLearning(noisyQLearning),
66 isCategorical(isCategorical),
85 {
return targetNetworkSyncInterval; }
138 double VMin()
const {
return vMin; }
140 double&
VMin() {
return vMin; }
143 double VMax()
const {
return vMax; }
145 double&
VMax() {
return vMax; }
148 double Rho()
const {
return rho; }
150 double&
Rho() {
return rho; }
165 size_t updateInterval;
171 size_t targetNetworkSyncInterval;
184 size_t explorationSteps;
202 double gradientLimit;
208 bool doubleQLearning;
double & GradientLimit()
Modify the limit of update gradient.
bool & NoisyQLearning()
Modify the indicator of double q-learning.
double StepSize() const
Get the step size of the optimizer.
size_t ExplorationSteps() const
Get the exploration steps.
bool & IsCategorical()
Modify the indicator of categorical q-learning.
bool IsCategorical() const
Get the indicator of categorical q-learning.
double Rho() const
Get the rho value for sac.
size_t & StepLimit()
Modify the maximum steps of each episode.
TrainingConfig(size_t numWorkers, size_t updateInterval, size_t targetNetworkSyncInterval, size_t stepLimit, size_t explorationSteps, double stepSize, double discount, double gradientLimit, bool doubleQLearning, bool noisyQLearning, bool isCategorical, size_t atomSize, double vMin, double vMax, double rho)
double & Rho()
Modify the rho value for sac.
size_t UpdateInterval() const
Get the update interval.
double GradientLimit() const
Get the limit of update gradient.
double & StepSize()
Modify the step size of the optimizer.
bool DoubleQLearning() const
Get the indicator of double q-learning.
double & Discount()
Modify the discount rate for future reward.
double & VMax()
Modify the maximum value for support.
size_t & NumWorkers()
Modify the amount of workers.
bool NoisyQLearning() const
Get the indicator of noisy q-learning.
size_t NumWorkers() const
Get the amount of workers.
double & VMin()
Modify the minimum value for support.
size_t & AtomSize()
Modify the number of atoms.
double Discount() const
Get the discount rate for future reward.
size_t & ExplorationSteps()
Modify the exploration steps.
size_t StepLimit() const
Get the maximum steps of each episode.
size_t TargetNetworkSyncInterval() const
Get the interval for syncing target network.
size_t AtomSize() const
Get the number of atoms.
size_t & TargetNetworkSyncInterval()
Modify the interval for syncing target network.
double VMin() const
Get the minimum value for support.
bool & DoubleQLearning()
Modify the indicator of double q-learning.
double VMax() const
Get the maximum value for support.
size_t & UpdateInterval()
Modify the update interval.
Linear algebra utility functions, generally performed on matrices or vectors.