mlpack 3.4.2
reward_clipping.hpp
Go to the documentation of this file.
1
12#ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13#define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
14
15#include <mlpack/prereqs.hpp>
17
18namespace mlpack {
19namespace rl {
20
29template <typename EnvironmentType>
31{
32 public:
34 using State = typename EnvironmentType::State;
35
37 using Action = typename EnvironmentType::Action;
38
47 RewardClipping(EnvironmentType& environment,
48 const double minReward = -1.0,
49 const double maxReward = 1.0) :
50 environment(environment),
51 minReward(minReward),
52 maxReward(maxReward)
53 {
54 // Nothing to do here
55 }
56
63 {
64 return environment.InitialSample();
65 }
66
74 bool IsTerminal(const State& state) const
75 {
76 return environment.IsTerminal(state);
77 }
78
88 double Sample(const State& state,
89 const Action& action,
90 State& nextState)
91 {
92 // Get original unclipped reward from base environment.
93 double unclippedReward = environment.Sample(state, action, nextState);
94 // Clip rewards according to the min and max limit and return.
95 return math::ClampRange(unclippedReward, minReward, maxReward);
96 }
97
106 double Sample(const State& state, const Action& action)
107 {
108 State nextState;
109 return Sample(state, action, nextState);
110 }
111
113 EnvironmentType& Environment() const { return environment; }
115 EnvironmentType& Environment() { return environment; }
116
118 double MinReward() const { return minReward; }
120 double& MinReward() { return minReward; }
121
123 double MaxReward() const { return maxReward; }
125 double& MaxReward() { return maxReward; }
126
127 private:
129 EnvironmentType environment;
130
132 double minReward;
133
135 double maxReward;
136};
137
138} // namespace rl
139} // namespace mlpack
140
141#endif
Miscellaneous math clamping routines.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double MaxReward() const
Get the maximum reward value.
double & MaxReward()
Modify the maximum reward value.
typename EnvironmentType::Action Action
Convenient typedef for action.
typename EnvironmentType::State State
Convenient typedef for state.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
double & MinReward()
Modify the minimum reward value.
EnvironmentType & Environment() const
Get the environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
double MinReward() const
Get the minimum reward value.
EnvironmentType & Environment()
Modify the environment.
double Sample(const State &state, const Action &action)
Dynamics of Environment.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.