mlpack 3.4.2
continuous_mountain_car.hpp
Go to the documentation of this file.
1
17#ifndef MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
18#define MLPACK_METHODS_RL_ENVIRONMENT_CONTINUOUS_MOUNTAIN_CAR_HPP
19
20#include <mlpack/prereqs.hpp>
22
23namespace mlpack {
24namespace rl {
25
30{
31 public:
36 class State
37 {
38 public:
42 State() : data(dimension, arma::fill::zeros)
43 { /* Nothing to do here. */ }
44
50 State(const arma::colvec& data): data(data)
51 { /* Nothing to do here. */ }
52
54 arma::colvec& Data() { return data; }
55
57 double Velocity() const { return data[0]; }
59 double& Velocity() { return data[0]; }
60
62 double Position() const { return data[1]; }
64 double& Position() { return data[1]; }
65
67 const arma::colvec& Encode() const { return data; }
68
70 static constexpr size_t dimension = 2;
71
72 private:
74 arma::colvec data;
75 };
76
85 struct Action
86 {
87 double action[1];
88 // Storing degree of freedom
89 const int size = 1;
90 };
91
105 ContinuousMountainCar(const double positionMin = -1.2,
106 const double positionMax = 0.6,
107 const double positionGoal = 0.45,
108 const double velocityMin = -0.07,
109 const double velocityMax = 0.07,
110 const double duration = 0.0015,
111 const double doneReward = 100,
112 const size_t maxSteps = 0) :
113 positionMin(positionMin),
114 positionMax(positionMax),
115 positionGoal(positionGoal),
116 velocityMin(velocityMin),
117 velocityMax(velocityMax),
118 duration(duration),
119 doneReward(doneReward),
120 maxSteps(maxSteps),
121 stepsPerformed(0)
122 { /* Nothing to do here */ }
123
132 double Sample(const State& state,
133 const Action& action,
134 State& nextState)
135 {
136 // Update the number of steps performed.
137 stepsPerformed++;
138
139 // Calculate acceleration.
140 double force = math::ClampRange(action.action[0], -1.0, 1.0);
141
142 // Update states.
143 nextState.Velocity() = state.Velocity() + force * duration - 0.0025 *
144 std::cos(3 * state.Position());
145 nextState.Velocity() = math::ClampRange(nextState.Velocity(),
146 velocityMin, velocityMax);
147 nextState.Position() = state.Position() + nextState.Velocity();
148 nextState.Position() = math::ClampRange(nextState.Position(),
149 positionMin, positionMax);
150 if (nextState.Position() == positionMin && nextState.Velocity() < 0)
151 nextState.Velocity() = 0.0;
152
153 // Check if the episode has terminated.
154 bool done = IsTerminal(nextState);
155
156 // Do not reward the agent if time ran out.
157 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
158 return 0;
159 else if (done)
160 return doneReward;
161
162 return std::pow(action.action[0], 2) * 0.1;
163 }
164
173 double Sample(const State& state, const Action& action)
174 {
175 State nextState;
176 return Sample(state, action, nextState);
177 }
178
186 {
187 State state;
188 stepsPerformed = 0;
189 state.Velocity() = 0.0;
190 state.Position() = math::Random(-0.6, -0.4);
191 return state;
192 }
193
200 bool IsTerminal(const State& state) const
201 {
202 if (maxSteps != 0 && stepsPerformed >= maxSteps)
203 {
204 Log::Info << "Episode terminated due to the maximum number of steps"
205 "being taken.";
206 return true;
207 }
208 else if (state.Position() >= positionGoal)
209 {
210 Log::Info << "Episode terminated due to agent succeeding.";
211 return true;
212 }
213 return false;
214 }
215
217 size_t StepsPerformed() const { return stepsPerformed; }
218
220 size_t MaxSteps() const { return maxSteps; }
222 size_t& MaxSteps() { return maxSteps; }
223
224 private:
226 double positionMin;
227
229 double positionMax;
230
232 double positionGoal;
233
235 double velocityMin;
236
238 double velocityMax;
239
241 double duration;
242
244 double doneReward;
245
247 size_t maxSteps;
248
250 size_t stepsPerformed;
251};
252
253} // namespace rl
254} // namespace mlpack
255
256#endif
Miscellaneous math clamping routines.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
Implementation of state of Continuous Mountain Car.
const arma::colvec & Encode() const
Encode the state to a column vector.
State(const arma::colvec &data)
Construct a state based on the given data.
static constexpr size_t dimension
Dimension of the encoded state.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of Continuous Mountain Car task.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Continuous Mountain Car.
size_t & MaxSteps()
Set the maximum number of steps allowed.
ContinuousMountainCar(const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.45, const double velocityMin=-0.07, const double velocityMax=0.07, const double duration=0.0015, const double doneReward=100, const size_t maxSteps=0)
Construct a Continuous Mountain Car instance using the given constant.
size_t StepsPerformed() const
Get the number of steps performed.
bool IsTerminal(const State &state) const
Whether given state is a terminal state.
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
size_t MaxSteps() const
Get the maximum number of steps allowed.
double Sample(const State &state, const Action &action)
Dynamics of Continuous Mountain Car.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of action of Continuous Mountain Car.