mlpack 3.4.2
acrobot.hpp
Go to the documentation of this file.
1
13#ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
14#define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
15
16#include <mlpack/core.hpp>
17
18namespace mlpack{
19namespace rl{
20
29{
30 public:
31 /*
32 * Implementation of Acrobot State. Each State is a tuple vector
33 * (theta1, thetha2, angular velocity 1, angular velocity 2).
34 */
35 class State
36 {
37 public:
41 State(): data(dimension) { /* nothing to do here */ }
42
48 State(const arma::colvec& data) : data(data)
49 { /* nothing to do here */ }
50
52 arma::colvec& Data() { return data; }
53
55 double Theta1() const { return data[0]; }
57 double& Theta1() { return data[0]; }
58
60 double Theta2() const { return data[1]; }
62 double& Theta2() { return data[1]; }
63
65 double AngularVelocity1() const { return data[2]; }
67 double& AngularVelocity1() { return data[2]; }
68
70 double AngularVelocity2() const { return data[3]; }
72 double& AngularVelocity2() { return data[3]; }
73
75 const arma::colvec& Encode() const { return data; }
76
78 static constexpr size_t dimension = 4;
79
80 private:
82 arma::colvec data;
83 };
84
85 /*
86 * Implementation of action for Acrobot
87 */
88 class Action
89 {
90 public:
92 {
96 };
97 // To store the action.
99
100 // Track the size of the action space.
101 static const size_t size = 3;
102 };
103
122 Acrobot(const size_t maxSteps = 500,
123 const double gravity = 9.81,
124 const double linkLength1 = 1.0,
125 const double linkLength2 = 1.0,
126 const double linkMass1 = 1.0,
127 const double linkMass2 = 1.0,
128 const double linkCom1 = 0.5,
129 const double linkCom2 = 0.5,
130 const double linkMoi = 1.0,
131 const double maxVel1 = 4 * M_PI,
132 const double maxVel2 = 9 * M_PI,
133 const double dt = 0.2,
134 const double doneReward = 0) :
135 maxSteps(maxSteps),
136 gravity(gravity),
137 linkLength1(linkLength1),
138 linkLength2(linkLength2),
139 linkMass1(linkMass1),
140 linkMass2(linkMass2),
141 linkCom1(linkCom1),
142 linkCom2(linkCom2),
143 linkMoi(linkMoi),
144 maxVel1(maxVel1),
145 maxVel2(maxVel2),
146 dt(dt),
147 doneReward(doneReward),
148 stepsPerformed(0)
149 { /* Nothing to do here */ }
150
160 double Sample(const State& state,
161 const Action& action,
162 State& nextState)
163 {
164 // Update the number of steps performed.
165 stepsPerformed++;
166
167 // Make a vector to estimate nextstate.
168 arma::colvec currentState = {state.Theta1(), state.Theta2(),
169 state.AngularVelocity1(), state.AngularVelocity2()};
170
171 arma::colvec currentNextState = Rk4(currentState, Torque(action));
172
173 nextState.Theta1() = Wrap(currentNextState[0], -M_PI, M_PI);
174
175 nextState.Theta2() = Wrap(currentNextState[1], -M_PI, M_PI);
176
178 nextState.AngularVelocity1() = math::ClampRange(currentNextState[2],
179 -maxVel1, maxVel1);
180 nextState.AngularVelocity2() = math::ClampRange(currentNextState[3],
181 -maxVel2, maxVel2);
182
183 // Check if the episode has terminated.
184 bool done = IsTerminal(nextState);
185
186 // Do not reward the agent if time ran out.
187 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
188 return 0;
189 else if (done)
190 return doneReward;
191
192 return -1;
193 };
194
204 double Sample(const State& state, const Action& action)
205 {
206 State nextState;
207 return Sample(state, action, nextState);
208 }
209
214 {
215 stepsPerformed = 0;
216 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
217 }
218
225 bool IsTerminal(const State& state) const
226 {
227 if (maxSteps != 0 && stepsPerformed >= maxSteps)
228 {
229 Log::Info << "Episode terminated due to the maximum number of steps"
230 "being taken.";
231 return true;
232 }
233 else if (-std::cos(state.Theta1()) - std::cos(state.Theta1() +
234 state.Theta2()) > 1.0)
235 {
236 Log::Info << "Episode terminated due to agent succeeding.";
237 return true;
238 }
239 return false;
240 }
241
249 arma::colvec Dsdt(arma::colvec state, const double torque) const
250 {
251 const double m1 = linkMass1;
252 const double m2 = linkMass2;
253 const double l1 = linkLength1;
254 const double lc1 = linkCom1;
255 const double lc2 = linkCom2;
256 const double I1 = linkMoi;
257 const double I2 = linkMoi;
258 const double g = gravity;
259 const double a = torque;
260 const double theta1 = state[0];
261 const double theta2 = state[1];
262
263 arma::colvec values(4);
264 values[0] = state[2];
265 values[1] = state[3];
266
267 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
268 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
269
270 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
271 I2;
272
273 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 - M_PI / 2.);
274
275 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
276 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
277 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
278 std::cos(theta1 - M_PI / 2) + phi2;
279
280 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
281 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
282 std::pow(d2, 2) / d1);
283
284 values[2] = -(d2 * values[3] + phi1) / d1;
285
286 return values;
287 };
288
298 double Wrap(double value,
299 const double minimum,
300 const double maximum) const
301 {
302 const double diff = maximum - minimum;
303
304 if (value > maximum)
305 {
306 value = value - diff;
307 }
308 else if (value < minimum)
309 {
310 value = value + diff;
311 }
312
313 return value;
314 };
315
322 double Torque(const Action& action) const
323 {
324 // Add noise to the Torque Torque is action number - 1. {0,1,2} -> {-1,0,1}.
325 return double(action.action - 1) + mlpack::math::Random(-0.1, 0.1);
326 }
327
335 arma::colvec Rk4(const arma::colvec state, const double torque) const
336 {
337 arma::colvec k1 = Dsdt(state, torque);
338 arma::colvec k2 = Dsdt(state + dt * k1 / 2, torque);
339 arma::colvec k3 = Dsdt(state + dt * k2 / 2, torque);
340 arma::colvec k4 = Dsdt(state + dt * k3, torque);
341 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
342
343 return nextState;
344 };
345
347 size_t StepsPerformed() const { return stepsPerformed; }
348
350 size_t MaxSteps() const { return maxSteps; }
352 size_t& MaxSteps() { return maxSteps; }
353
354 private:
356 size_t maxSteps;
357
359 double gravity;
360
362 double linkLength1;
363
365 double linkLength2;
366
368 double linkMass1;
369
371 double linkMass2;
372
374 double linkCom1;
375
377 double linkCom2;
378
380 double linkMoi;
381
383 double maxVel1;
384
386 double maxVel2;
387
389 double dt;
390
392 double doneReward;
393
395 size_t stepsPerformed;
396};
397
398} // namespace rl
399} // namespace mlpack
400
401#endif
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
static const size_t size
Definition: acrobot.hpp:101
Action::actions action
Definition: acrobot.hpp:98
double Theta2() const
Get value of theta (two).
Definition: acrobot.hpp:60
double & Theta1()
Modify value of theta (one).
Definition: acrobot.hpp:57
double & Theta2()
Modify value of theta (two).
Definition: acrobot.hpp:62
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: acrobot.hpp:75
State()
Construct a state instance.
Definition: acrobot.hpp:41
State(const arma::colvec &data)
Construct a state instance from given data.
Definition: acrobot.hpp:48
double & AngularVelocity1()
Modify the angular velocity (one).
Definition: acrobot.hpp:67
double AngularVelocity1() const
Get value of Angular velocity (one).
Definition: acrobot.hpp:65
double & AngularVelocity2()
Modify the angular velocity (two).
Definition: acrobot.hpp:72
double AngularVelocity2() const
Get value of Angular velocity (two).
Definition: acrobot.hpp:70
static constexpr size_t dimension
Dimension of the encoded state.
Definition: acrobot.hpp:78
arma::colvec & Data()
Modify the state representation.
Definition: acrobot.hpp:52
double Theta1() const
Get value of theta (one).
Definition: acrobot.hpp:55
Implementation of Acrobot game.
Definition: acrobot.hpp:29
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:160
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: acrobot.hpp:352
size_t StepsPerformed() const
Get the number of steps performed.
Definition: acrobot.hpp:347
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method.
Definition: acrobot.hpp:249
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
Definition: acrobot.hpp:225
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
Definition: acrobot.hpp:298
State InitialSample()
This function does random initialization of state space.
Definition: acrobot.hpp:213
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: acrobot.hpp:350
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
Definition: acrobot.hpp:335
double Torque(const Action &action) const
This function calculates the torque for a particular action.
Definition: acrobot.hpp:322
Acrobot(const size_t maxSteps=500, const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.
Definition: acrobot.hpp:122
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Definition: acrobot.hpp:204
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
#define M_PI
Definition: prereqs.hpp:39