12#ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13#define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
29template <
typename EnvironmentType>
34 using State =
typename EnvironmentType::State;
37 using Action =
typename EnvironmentType::Action;
48 const double minReward = -1.0,
49 const double maxReward = 1.0) :
50 environment(environment),
64 return environment.InitialSample();
76 return environment.IsTerminal(state);
93 double unclippedReward = environment.Sample(state, action, nextState);
109 return Sample(state, action, nextState);
129 EnvironmentType environment;
Miscellaneous math clamping routines.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double MaxReward() const
Get the maximum reward value.
double & MaxReward()
Modify the maximum reward value.
typename EnvironmentType::Action Action
Convenient typedef for action.
typename EnvironmentType::State State
Convenient typedef for state.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
double & MinReward()
Modify the minimum reward value.
EnvironmentType & Environment() const
Get the environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
double MinReward() const
Get the minimum reward value.
EnvironmentType & Environment()
Modify the environment.
double Sample(const State &state, const Action &action)
Dynamics of Environment.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Linear algebra utility functions, generally performed on matrices or vectors.
The core includes that mlpack expects; standard C++ includes and Armadillo.