MueLu  Version of the Day
MueLu_FilteredAFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_FILTEREDAFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_FILTEREDAFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 
52 
53 #include <Xpetra_Matrix.hpp>
54 #include <Xpetra_MatrixFactory.hpp>
55 
56 #include "MueLu_FactoryManager.hpp"
57 #include "MueLu_Level.hpp"
58 #include "MueLu_MasterList.hpp"
59 #include "MueLu_Monitor.hpp"
60 
61 namespace MueLu {
62 
63  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
64  RCP<const ParameterList> FilteredAFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
65  GetValidParameterList() const {
66  RCP<ParameterList> validParamList = rcp(new ParameterList());
67 
68 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
69  SET_VALID_ENTRY("filtered matrix: use lumping");
70  SET_VALID_ENTRY("filtered matrix: reuse graph");
71  SET_VALID_ENTRY("filtered matrix: reuse eigenvalue");
72 #undef SET_VALID_ENTRY
73 
74  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A used for filtering");
75  validParamList->set< RCP<const FactoryBase> >("Graph", Teuchos::null, "Generating factory for coalesced filtered graph");
76  validParamList->set< RCP<const FactoryBase> >("Filtering", Teuchos::null, "Generating factory for filtering boolean");
77 
78  return validParamList;
79  }
80 
81  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
82  void FilteredAFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
83  DeclareInput(Level& currentLevel) const {
84  Input(currentLevel, "A");
85  Input(currentLevel, "Filtering");
86  Input(currentLevel, "Graph");
87  }
88 
89  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
90  void FilteredAFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
91  Build(Level& currentLevel) const {
92  FactoryMonitor m(*this, "Matrix filtering", currentLevel);
93 
94  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
95  if (Get<bool>(currentLevel, "Filtering") == false) {
96  GetOStream(Runtime0) << "Filtered matrix is not being constructed as no filtering is being done" << std::endl;
97  Set(currentLevel, "A", A);
98  return;
99  }
100 
101  const ParameterList& pL = GetParameterList();
102  bool lumping = pL.get<bool>("filtered matrix: use lumping");
103  if (lumping)
104  GetOStream(Runtime0) << "Lumping dropped entries" << std::endl;
105 
106  RCP<LWGraph_kokkos> graph = Get< RCP<LWGraph_kokkos> >(currentLevel, "Graph");
107 
108  RCP<ParameterList> fillCompleteParams(new ParameterList);
109  fillCompleteParams->set("No Nonlocal Changes", true);
110 
111  RCP<Matrix> filteredA;
112  if (pL.get<bool>("filtered matrix: reuse graph")) {
113  filteredA = MatrixFactory::Build(A->getCrsGraph());
114  filteredA->fillComplete(fillCompleteParams);
115 
116  BuildReuse(*A, *graph, lumping, *filteredA);
117 
118  } else {
119  filteredA = MatrixFactory::Build(A->getRowMap(), A->getColMap(), A->getNodeMaxNumRowEntries(), Xpetra::StaticProfile);
120 
121  BuildNew(*A, *graph, lumping, *filteredA);
122 
123  filteredA->fillComplete(A->getDomainMap(), A->getRangeMap(), fillCompleteParams);
124  }
125 
126  filteredA->SetFixedBlockSize(A->GetFixedBlockSize());
127 
128  if (pL.get<bool>("filtered matrix: reuse eigenvalue")) {
129  // Reuse max eigenvalue from A
130  // It is unclear what eigenvalue is the best for the smoothing, but we already may have
131  // the D^{-1}A estimate in A, may as well use it.
132  // NOTE: ML does that too
133  filteredA->SetMaxEigenvalueEstimate(A->GetMaxEigenvalueEstimate());
134  }
135 
136  Set(currentLevel, "A", filteredA);
137  }
138 
139  // Both Epetra and Tpetra matrix-matrix multiply use the following trick:
140  // if an entry of the left matrix is zero, it does not compute or store the
141  // zero value.
142  //
143  // This trick allows us to bypass constructing a new matrix. Instead, we
144  // make a deep copy of the original one, and fill it in with zeros, which
145  // are ignored during the prolongator smoothing.
146  template<class MatrixType, class GraphType, class FilterType>
147  class BuildReuseFunctor {
148  private:
149  MatrixType localA, localFA;
150  size_t blkSize;
151  GraphType graph;
152  bool lumping;
153  FilterType filter;
154 
155  typedef typename MatrixType::ordinal_type LO;
156  typedef typename MatrixType::value_type SC;
157  typedef Kokkos::ArithTraits<SC> ATS;
158 
159  public:
160  BuildReuseFunctor(MatrixType localA_, MatrixType localFA_, size_t blkSize_, GraphType graph_, bool lumping_, FilterType filter_) :
161  localA(localA_),
162  localFA(localFA_),
163  blkSize(blkSize_),
164  graph(graph_),
165  lumping(lumping_),
166  filter(filter_)
167  { }
168 
169  KOKKOS_INLINE_FUNCTION
170  void operator()(const size_t i) const {
171  // Set up filtering array
172  typename GraphType::row_type indsG = graph.getNeighborVertices(i);
173  for (size_t j = 0; j < indsG.size(); j++)
174  for (size_t k = 0; k < blkSize; k++)
175  filter(indsG(j)*blkSize + k) = 1;
176 
177  SC zero = ATS::zero();
178  for (size_t k = 0; k < blkSize; k++) {
179  LO row = i*blkSize + k;
180 
181  auto rowA = localA.row (row);
182  auto nnz = rowA.length;
183 
184  if (nnz == 0)
185  continue;
186 
187  auto rowFA = localFA.row (row);
188 
189  for (decltype(nnz) j = 0; j < nnz; j++)
190  rowFA.value(j) = rowA.value(j);
191 
192  if (lumping == false) {
193  for (decltype(nnz) j = 0; j < nnz; j++)
194  if (!filter(rowA.colidx(j)))
195  rowFA.value(j) = zero;
196 
197  } else {
198  LO diagIndex = -1;
199  SC diagExtra = zero;
200 
201  for (decltype(nnz) j = 0; j < nnz; j++) {
202  if (filter(rowA.colidx(j))) {
203  if (rowA.colidx(j) == row) {
204  // Remember diagonal position
205  diagIndex = j;
206  }
207  continue;
208  }
209 
210  diagExtra += rowFA.value(j);
211 
212  rowFA.value(j) = zero;
213  }
214 
215  // Lump dropped entries
216  // NOTE
217  // * Does it make sense to lump for elasticity?
218  // * Is it different for diffusion and elasticity?
219  if (diagIndex != -1)
220  rowFA.value(diagIndex) += diagExtra;
221  }
222  }
223 
224  // Reset filtering array
225  for (size_t j = 0; j < indsG.size(); j++)
226  for (size_t k = 0; k < blkSize; k++)
227  filter(indsG(j)*blkSize + k) = 0;
228  }
229  };
230 
231  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
232  void FilteredAFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
233  BuildReuse(const Matrix& A, const LWGraph_kokkos& graph, const bool lumping, Matrix& filteredA) const {
234  SC zero = Teuchos::ScalarTraits<SC>::zero();
235 
236  size_t blkSize = A.GetFixedBlockSize();
237 
238  auto localA = A .getLocalMatrix();
239  auto localFA = filteredA.getLocalMatrix();
240 
241  Kokkos::View<char*> filter("filter", blkSize * graph.GetImportMap()->getNodeNumElements(), 0);
242 
243  size_t numGRows = graph.GetNodeNumVertices();
244 
245  BuildReuseFunctor<decltype(localA), LWGraph_kokkos, decltype(filter)> functor(localA, localFA, blkSize, graph, lumping, filter);
246  Kokkos::parallel_for("MueLu:FilteredAF:BuildReuse:for", numGRows, functor);
247  }
248 
249  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
250  void FilteredAFactory_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
251  BuildNew(const Matrix& A, const LWGraph_kokkos& graph, const bool lumping, Matrix& filteredA) const {
252  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Not implemented");
253  }
254 
255 } //namespace MueLu
256 
257 #endif // HAVE_MUELU_KOKKOS_REFACTOR
258 #endif // MUELU_FILTEREDAFACTORY_KOKKOS_DEF_HPP
LocalOrdinal LO
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Scalar SC
#define SET_VALID_ENTRY(name)