mlpack 3.4.2
lstm.hpp
Go to the documentation of this file.
1
12#ifndef MLPACK_METHODS_ANN_LAYER_LSTM_HPP
13#define MLPACK_METHODS_ANN_LAYER_LSTM_HPP
14
15#include <mlpack/prereqs.hpp>
16#include <limits>
17
18namespace mlpack {
19namespace ann {
20
58template <
59 typename InputDataType = arma::mat,
60 typename OutputDataType = arma::mat
61>
62class LSTM
63{
64 public:
67
75 LSTM(const size_t inSize,
76 const size_t outSize,
77 const size_t rho = std::numeric_limits<size_t>::max());
78
86 template<typename InputType, typename OutputType>
87 void Forward(const InputType& input, OutputType& output);
88
98 template<typename InputType, typename OutputType>
99 void Forward(const InputType& input,
100 OutputType& output,
101 OutputType& cellState,
102 bool useCellState = false);
103
113 template<typename InputType, typename ErrorType, typename GradientType>
114 void Backward(const InputType& input,
115 const ErrorType& gy,
116 GradientType& g);
117
118 /*
119 * Reset the layer parameter.
120 */
121 void Reset();
122
123 /*
124 * Resets the cell to accept a new input. This breaks the BPTT chain starts a
125 * new one.
126 *
127 * @param size The current maximum number of steps through time.
128 */
129 void ResetCell(const size_t size);
130
131 /*
132 * Calculate the gradient using the output delta and the input activation.
133 *
134 * @param input The input parameter used for calculating the gradient.
135 * @param error The calculated error.
136 * @param gradient The calculated gradient.
137 */
138 template<typename InputType, typename ErrorType, typename GradientType>
139 void Gradient(const InputType& input,
140 const ErrorType& error,
141 GradientType& gradient);
142
144 size_t Rho() const { return rho; }
146 size_t& Rho() { return rho; }
147
149 OutputDataType const& Parameters() const { return weights; }
151 OutputDataType& Parameters() { return weights; }
152
154 OutputDataType const& OutputParameter() const { return outputParameter; }
156 OutputDataType& OutputParameter() { return outputParameter; }
157
159 OutputDataType const& Delta() const { return delta; }
161 OutputDataType& Delta() { return delta; }
162
164 OutputDataType const& Gradient() const { return grad; }
166 OutputDataType& Gradient() { return grad; }
167
169 size_t InSize() const { return inSize; }
170
172 size_t OutSize() const { return outSize; }
173
177 template<typename Archive>
178 void serialize(Archive& ar, const unsigned int /* version */);
179
180 private:
182 size_t inSize;
183
185 size_t outSize;
186
188 size_t rho;
189
191 size_t forwardStep;
192
194 size_t backwardStep;
195
197 size_t gradientStep;
198
200 OutputDataType weights;
201
203 OutputDataType prevOutput;
204
206 size_t batchSize;
207
209 size_t batchStep;
210
213 size_t gradientStepIdx;
214
216 OutputDataType cellActivationError;
217
219 OutputDataType delta;
220
222 OutputDataType grad;
223
225 OutputDataType outputParameter;
226
228 OutputDataType output2GateInputWeight;
229
231 OutputDataType input2GateInputWeight;
232
234 OutputDataType input2GateInputBias;
235
237 OutputDataType cell2GateInputWeight;
238
240 OutputDataType output2GateForgetWeight;
241
243 OutputDataType input2GateForgetWeight;
244
246 OutputDataType input2GateForgetBias;
247
249 OutputDataType cell2GateForgetWeight;
250
252 OutputDataType output2GateOutputWeight;
253
255 OutputDataType input2GateOutputWeight;
256
258 OutputDataType input2GateOutputBias;
259
261 OutputDataType cell2GateOutputWeight;
262
264 OutputDataType inputGate;
265
267 OutputDataType forgetGate;
268
270 OutputDataType hiddenLayer;
271
273 OutputDataType outputGate;
274
276 OutputDataType inputGateActivation;
277
279 OutputDataType forgetGateActivation;
280
282 OutputDataType outputGateActivation;
283
285 OutputDataType hiddenLayerActivation;
286
288 OutputDataType input2HiddenWeight;
289
291 OutputDataType input2HiddenBias;
292
294 OutputDataType output2HiddenWeight;
295
297 OutputDataType cell;
298
300 OutputDataType cellActivation;
301
303 OutputDataType forgetGateError;
304
306 OutputDataType outputGateError;
307
309 OutputDataType prevError;
310
312 OutputDataType outParameter;
313
315 OutputDataType inputCellError;
316
318 OutputDataType inputGateError;
319
321 OutputDataType hiddenError;
322
324 size_t rhoSize;
325
327 size_t bpttSteps;
328}; // class LSTM
329
330} // namespace ann
331} // namespace mlpack
332
333// Include implementation.
334#include "lstm_impl.hpp"
335
336#endif
Implementation of the LSTM module class.
Definition: lstm.hpp:63
void Forward(const InputType &input, OutputType &output)
Ordinary feed-forward pass of a neural network, evaluating the function f(x) by propagating the activ...
OutputDataType const & Delta() const
Get the delta.
Definition: lstm.hpp:159
size_t & Rho()
Modify the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:146
OutputDataType const & Parameters() const
Get the parameters.
Definition: lstm.hpp:149
size_t OutSize() const
Get the number of output units.
Definition: lstm.hpp:172
LSTM()
Create the LSTM object.
void Gradient(const InputType &input, const ErrorType &error, GradientType &gradient)
size_t Rho() const
Get the maximum number of steps to backpropagate through time (BPTT).
Definition: lstm.hpp:144
OutputDataType const & OutputParameter() const
Get the output parameter.
Definition: lstm.hpp:154
LSTM(const size_t inSize, const size_t outSize, const size_t rho=std::numeric_limits< size_t >::max())
Create the LSTM layer object using the specified parameters.
void Backward(const InputType &input, const ErrorType &gy, GradientType &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
OutputDataType const & Gradient() const
Get the gradient.
Definition: lstm.hpp:164
void ResetCell(const size_t size)
OutputDataType & Gradient()
Modify the gradient.
Definition: lstm.hpp:166
OutputDataType & OutputParameter()
Modify the output parameter.
Definition: lstm.hpp:156
size_t InSize() const
Get the number of input units.
Definition: lstm.hpp:169
void Forward(const InputType &input, OutputType &output, OutputType &cellState, bool useCellState=false)
Ordinary feed-forward pass of a neural network, evaluating the function f(x) by propagating the activ...
void serialize(Archive &ar, const unsigned int)
Serialize the layer.
OutputDataType & Parameters()
Modify the parameters.
Definition: lstm.hpp:151
OutputDataType & Delta()
Modify the delta.
Definition: lstm.hpp:161
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.