mlpack 3.4.2
pendulum.hpp
Go to the documentation of this file.
1
16#ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17#define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18
19#include <mlpack/prereqs.hpp>
21
22namespace mlpack {
23namespace rl {
24
32{
33 public:
38 class State
39 {
40 public:
44 State() : theta(0), data(dimension, arma::fill::zeros)
45 { /* Nothing to do here. */ }
46
53 State(const arma::colvec& data): theta(0), data(data)
54 { /* Nothing to do here. */ }
55
57 arma::colvec& Data() { return data; }
58
60 double Theta() const { return theta; }
62 double& Theta() { return theta; }
63
65 double AngularVelocity() const { return data[2]; }
67 double& AngularVelocity() { return data[2]; }
68
70 const arma::colvec& Encode() { return data; }
71
73 void SetState()
74 {
75 data[0] = std::sin(theta);
76 data[1] = std::cos(theta);
77 }
78
80 static constexpr size_t dimension = 3;
81
82 private:
84 double theta;
85
87 arma::colvec data;
88 };
89
95 class Action
96 {
97 public:
102 { /* Nothing to do here */ }
103 std::vector<double> action;
104 // Storing degree of freedom.
105 static const size_t size = 1;
106 };
107
118 Pendulum(const size_t maxSteps = 200,
119 const double maxAngularVelocity = 8,
120 const double maxTorque = 2.0,
121 const double dt = 0.05,
122 const double doneReward = 0.0) :
123 maxSteps(maxSteps),
124 maxAngularVelocity(maxAngularVelocity),
125 maxTorque(maxTorque),
126 dt(dt),
127 doneReward(doneReward),
128 stepsPerformed(0)
129 { /* Nothing to do here */ }
130
140 double Sample(const State& state,
141 const Action& action,
142 State& nextState)
143 {
144 // Update the number of steps performed.
145 stepsPerformed++;
146
147 // Get current state.
148 double theta = state.Theta();
149 double angularVelocity = state.AngularVelocity();
150
151 // Define constants which specify our pendulum.
152 const double gravity = 10.0;
153 const double mass = 1.0;
154 const double length = 1.0;
155
156 // Get action and clip the values between max and min limits.
157 double torque = math::ClampRange(action.action[0], -maxTorque, maxTorque);
158
159 // Calculate costs of taking this action in the current state.
160 double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
161 std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
162
163 // Calculate new state values and assign to the next state.
164 double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
165 length) * std::sin(theta + M_PI) + 3.0 / (mass * std::pow(length, 2)) *
166 torque) * dt;
167 nextState.Theta() = theta + newAngularVelocity * dt;
168 nextState.AngularVelocity() = math::ClampRange(newAngularVelocity,
169 -maxAngularVelocity, maxAngularVelocity);
170
171 nextState.SetState();
172
173 // Return the reward of taking the action in current state.
174 // The reward is simply the negative of cost incurred for the action.
175 return -costs;
176 }
177
185 double Sample(const State& state, const Action& action)
186 {
187 State nextState;
188 return Sample(state, action, nextState);
189 }
190
198 {
199 State state;
200 state.Theta() = math::Random(-M_PI, M_PI);
201 state.AngularVelocity() = math::Random(-1.0, 1.0);
202 stepsPerformed = 0;
203 state.SetState();
204 return state;
205 }
206
212 double AngleNormalize(double theta) const
213 {
214 // Scale angle within [-pi, pi).
215 double x = fmod(theta + M_PI, 2 * M_PI);
216 if (x < 0)
217 x += 2 * M_PI;
218 return x - M_PI;
219 }
220
227 bool IsTerminal(const State& /* state */) const
228 {
229 if (maxSteps != 0 && stepsPerformed >= maxSteps)
230 {
231 Log::Info << "Episode terminated due to the maximum number of steps"
232 "being taken.";
233 return true;
234 }
235 return false;
236 }
237
239 size_t StepsPerformed() const { return stepsPerformed; }
240
242 size_t MaxSteps() const { return maxSteps; }
244 size_t& MaxSteps() { return maxSteps; }
245
246 private:
248 size_t maxSteps;
249
251 double maxAngularVelocity;
252
254 double maxTorque;
255
257 double dt;
258
260 double doneReward;
261
263 size_t stepsPerformed;
264};
265
266} // namespace rl
267} // namespace mlpack
268
269#endif
Miscellaneous math clamping routines.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
Implementation of action of Pendulum.
Definition: pendulum.hpp:96
static const size_t size
Definition: pendulum.hpp:105
Action()
Construct an action instance.
Definition: pendulum.hpp:101
std::vector< double > action
Definition: pendulum.hpp:103
Implementation of state of Pendulum.
Definition: pendulum.hpp:39
double Theta() const
Get the theta.
Definition: pendulum.hpp:60
State()
Construct a state instance.
Definition: pendulum.hpp:44
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:65
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:53
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:62
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:67
const arma::colvec & Encode()
Encode the state to a column vector.
Definition: pendulum.hpp:70
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:80
void SetState()
Updates the theta transformations in data.
Definition: pendulum.hpp:73
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:57
Implementation of Pendulum task.
Definition: pendulum.hpp:32
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:140
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:244
Pendulum(const size_t maxSteps=200, const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double doneReward=0.0)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:118
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:239
bool IsTerminal(const State &) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:227
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:197
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:212
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:242
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:185
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:83
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.
#define M_PI
Definition: prereqs.hpp:39