mlpack 3.4.2
mountain_car.hpp
Go to the documentation of this file.
1
16#ifndef MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
17#define MLPACK_METHODS_RL_ENVIRONMENT_MOUNTAIN_CAR_HPP
18
19#include <mlpack/prereqs.hpp>
21
22namespace mlpack {
23namespace rl {
24
29{
30 public:
35 class State
36 {
37 public:
41 State(): data(dimension, arma::fill::zeros)
42 { /* Nothing to do here. */ }
43
49 State(const arma::colvec& data): data(data)
50 { /* Nothing to do here. */ }
51
53 arma::colvec& Data() { return data; }
54
56 double Velocity() const { return data[0]; }
58 double& Velocity() { return data[0]; }
59
61 double Position() const { return data[1]; }
63 double& Position() { return data[1]; }
64
66 const arma::colvec& Encode() const { return data; }
67
69 static constexpr size_t dimension = 2;
70
71 private:
73 arma::colvec data;
74 };
75
79 class Action
80 {
81 public:
83 {
87 };
88 // To store the action.
90
91 // Track the size of the action space.
92 static const size_t size = 3;
93 };
94
107 MountainCar(const size_t maxSteps = 200,
108 const double positionMin = -1.2,
109 const double positionMax = 0.6,
110 const double positionGoal = 0.5,
111 const double velocityMin = -0.07,
112 const double velocityMax = 0.07,
113 const double doneReward = 0) :
114 maxSteps(maxSteps),
115 positionMin(positionMin),
116 positionMax(positionMax),
117 positionGoal(positionGoal),
118 velocityMin(velocityMin),
119 velocityMax(velocityMax),
120 doneReward(doneReward),
121 stepsPerformed(0)
122 { /* Nothing to do here */ }
123
133 double Sample(const State& state,
134 const Action& action,
135 State& nextState)
136 {
137 // Update the number of steps performed.
138 stepsPerformed++;
139
140 // Calculate acceleration.
141 int direction = action.action - 1;
142 nextState.Velocity() = state.Velocity() + 0.001 * direction - 0.0025 *
143 std::cos(3 * state.Position());
144 nextState.Velocity() = math::ClampRange(nextState.Velocity(),
145 velocityMin, velocityMax);
146
147 // Update states.
148 nextState.Position() = state.Position() + nextState.Velocity();
149 nextState.Position() = math::ClampRange(nextState.Position(),
150 positionMin, positionMax);
151
152 if (nextState.Position() == positionMin && nextState.Velocity() < 0)
153 nextState.Velocity() = 0.0;
154
155 // Check if the episode has terminated.
156 bool done = IsTerminal(nextState);
157
158 // Do not reward the agent if time ran out.
159 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
160 return 0;
161 else if (done)
162 return doneReward;
163
164 return -1;
165 }
166
175 double Sample(const State& state, const Action& action)
176 {
177 State nextState;
178 return Sample(state, action, nextState);
179 }
180
188 {
189 State state;
190 stepsPerformed = 0;
191 state.Velocity() = 0.0;
192 state.Position() = arma::as_scalar(arma::randu(1)) * 0.2 - 0.6;
193 return state;
194 }
195
202 bool IsTerminal(const State& state) const
203 {
204 if (maxSteps != 0 && stepsPerformed >= maxSteps)
205 {
206 Log::Info << "Episode terminated due to the maximum number of steps"
207 "being taken.";
208 return true;
209 }
210 else if (state.Position() >= positionGoal)
211 {
212 Log::Info << "Episode terminated due to agent succeeding.";
213 return true;
214 }
215 return false;
216 }
217
219 size_t StepsPerformed() const { return stepsPerformed; }
220
222 size_t MaxSteps() const { return maxSteps; }
224 size_t& MaxSteps() { return maxSteps; }
225
226 private:
228 size_t maxSteps;
229
231 double positionMin;
232
234 double positionMax;
235
237 double positionGoal;
238
240 double velocityMin;
241
243 double velocityMax;
244
246 double doneReward;
247
249 size_t stepsPerformed;
250};
251
252} // namespace rl
253} // namespace mlpack
254
255#endif
Miscellaneous math clamping routines.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
Implementation of action of Mountain Car.
Implementation of state of Mountain Car.
const arma::colvec & Encode() const
Encode the state to a column vector.
double & Velocity()
Modify the velocity.
double Velocity() const
Get the velocity.
State()
Construct a state instance.
State(const arma::colvec &data)
Construct a state based on the given data.
double & Position()
Modify the position.
double Position() const
Get the position.
static constexpr size_t dimension
Dimension of the encoded state.
arma::colvec & Data()
Modify the internal representation of the state.
Implementation of Mountain Car task.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Mountain Car.
MountainCar(const size_t maxSteps=200, const double positionMin=-1.2, const double positionMax=0.6, const double positionGoal=0.5, const double velocityMin=-0.07, const double velocityMax=0.07, const double doneReward=0)
Construct a Mountain Car instance using the given constant.
size_t & MaxSteps()
Set the maximum number of steps allowed.
size_t StepsPerformed() const
Get the number of steps performed.
bool IsTerminal(const State &state) const
This function checks if the car has reached the terminal state.
State InitialSample()
Initial position is randomly generated within [-0.6, -0.4].
size_t MaxSteps() const
Get the maximum number of steps allowed.
double Sample(const State &state, const Action &action)
Dynamics of Mountain Car.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.