mlpack 3.4.2
multihead_attention.hpp
Go to the documentation of this file.
1
24#ifndef MLPACK_METHODS_ANN_LAYER_MULTIHEAD_ATTENTION_HPP
25#define MLPACK_METHODS_ANN_LAYER_MULTIHEAD_ATTENTION_HPP
26
27#include <mlpack/prereqs.hpp>
32
33namespace mlpack {
34namespace ann {
35
57template <
58 typename InputDataType = arma::mat,
59 typename OutputDataType = arma::mat,
60 typename RegularizerType = NoRegularizer
61>
63{
64 public:
69
78 MultiheadAttention(const size_t tgtSeqLen,
79 const size_t srcSeqLen,
80 const size_t embedDim,
81 const size_t numHeads);
82
86 void Reset();
87
95 template<typename eT>
96 void Forward(const arma::Mat<eT>& input, arma::Mat<eT>& output);
97
106 template<typename eT>
107 void Backward(const arma::Mat<eT>& /* input */,
108 const arma::Mat<eT>& gy,
109 arma::Mat<eT>& g);
110
118 template<typename eT>
119 void Gradient(const arma::Mat<eT>& input,
120 const arma::Mat<eT>& error,
121 arma::Mat<eT>& gradient);
122
126 template<typename Archive>
127 void serialize(Archive& ar, const unsigned int /* version */);
128
130 size_t TgtSeqLen() const { return tgtSeqLen; }
132 size_t& TgtSeqLen() { return tgtSeqLen; }
133
135 size_t SrcSeqLen() const { return srcSeqLen; }
137 size_t& SrcSeqLen() { return srcSeqLen; }
138
140 size_t EmbedDim() const { return embedDim; }
142 size_t& EmbedDim() { return embedDim; }
143
145 size_t NumHeads() const { return numHeads; }
147 size_t& NumHeads() { return numHeads; }
148
150 OutputDataType const& AttentionMask() const { return attnMask; }
152 OutputDataType& AttentionMask() { return attnMask; }
153
155 OutputDataType const& KeyPaddingMask() const { return keyPaddingMask; }
157 OutputDataType& KeyPaddingMask() { return keyPaddingMask; }
158
160 OutputDataType const& OutputParameter() const { return outputParameter; }
162 OutputDataType& OutputParameter() { return outputParameter; }
163
165 OutputDataType const& Delta() const { return delta; }
167 OutputDataType& Delta() { return delta; }
168
170 OutputDataType const& Gradient() const { return grad; }
172 OutputDataType& Gradient() { return grad; }
173
175 OutputDataType const& Parameters() const { return weights; }
177 OutputDataType& Parameters() { return weights; }
178
179 private:
181 typedef typename OutputDataType::elem_type ElemType;
182
184 size_t tgtSeqLen;
185
187 size_t srcSeqLen;
188
190 size_t embedDim;
191
193 size_t numHeads;
194
196 size_t headDim;
197
199 OutputDataType attnMask;
200
202 OutputDataType keyPaddingMask;
203
205 OutputDataType queryWt;
206
208 OutputDataType keyWt;
209
211 OutputDataType valueWt;
212
214 OutputDataType outWt;
215
217 OutputDataType qBias;
218
220 OutputDataType kBias;
221
223 OutputDataType vBias;
224
226 OutputDataType outBias;
227
229 OutputDataType weights;
230
232 arma::Cube<ElemType> qProj;
233
235 arma::Cube<ElemType> kProj;
236
238 arma::Cube<ElemType> vProj;
239
241 arma::Cube<ElemType> scores;
242
244 arma::Cube<ElemType> attnOut;
245
248
250 OutputDataType delta;
251
253 OutputDataType grad;
254
256 OutputDataType outputParameter;
257
259 RegularizerType regularizer;
260}; // class MultiheadAttention
261} // namespace ann
262} // namespace mlpack
263
264// Include implementation.
265#include "multihead_attention_impl.hpp"
266
267#endif
Multihead Attention allows the model to jointly attend to information from different representation s...
size_t & SrcSeqLen()
Modify the source sequence length.
OutputDataType const & Delta() const
Get the delta.
size_t & NumHeads()
Modify the number of attention heads.
size_t & EmbedDim()
Modify the embedding dimension.
OutputDataType const & Parameters() const
Get the parameters.
size_t NumHeads() const
Get the number of attention heads.
size_t TgtSeqLen() const
Get the target sequence length.
void Reset()
Reset the layer parameters.
size_t & TgtSeqLen()
Modify the target sequence length.
void Forward(const arma::Mat< eT > &input, arma::Mat< eT > &output)
Ordinary feed forward pass of a neural network, evaluating the function f(x) by propagating the activ...
OutputDataType const & OutputParameter() const
Get the output parameter.
MultiheadAttention()
Default constructor.
OutputDataType const & KeyPaddingMask() const
Get Key Padding Mask.
void Gradient(const arma::Mat< eT > &input, const arma::Mat< eT > &error, arma::Mat< eT > &gradient)
Calculate the gradient using the output delta and the input activation.
MultiheadAttention(const size_t tgtSeqLen, const size_t srcSeqLen, const size_t embedDim, const size_t numHeads)
Create the MultiheadAttention object using the specified modules.
OutputDataType const & Gradient() const
Get the gradient.
OutputDataType & AttentionMask()
Modify the two dimensional Attention Mask.
OutputDataType & Gradient()
Modify the gradient.
size_t EmbedDim() const
Get the embedding dimension.
OutputDataType const & AttentionMask() const
Get the two dimensional Attention Mask.
void Backward(const arma::Mat< eT > &, const arma::Mat< eT > &gy, arma::Mat< eT > &g)
Ordinary feed backward pass of a neural network, calculating the function f(x) by propagating x backw...
OutputDataType & KeyPaddingMask()
Modify the Key Padding Mask.
OutputDataType & OutputParameter()
Modify the output parameter.
size_t SrcSeqLen() const
Get the source sequence length.
void serialize(Archive &ar, const unsigned int)
Serialize the layer.
OutputDataType & Parameters()
Modify the parameters.
OutputDataType & Delta()
Modify the delta.
Implementation of the Softmax layer.
Definition: softmax.hpp:39
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: cv.hpp:1
The core includes that mlpack expects; standard C++ includes and Armadillo.