MueLu  Version of the Day
MueLu_RefMaxwell_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_REFMAXWELL_DEF_HPP
47 #define MUELU_REFMAXWELL_DEF_HPP
48 
49 #include <sstream>
50 
51 #include "MueLu_ConfigDefs.hpp"
52 
53 #include "Xpetra_Map.hpp"
54 #include "Xpetra_MatrixMatrix.hpp"
55 #include "Xpetra_TripleMatrixMultiply.hpp"
56 #include "Xpetra_CrsMatrixUtils.hpp"
57 #include "Xpetra_MatrixUtils.hpp"
58 
60 
61 #include "MueLu_AmalgamationFactory.hpp"
62 #include "MueLu_RAPFactory.hpp"
63 #include "MueLu_ThresholdAFilterFactory.hpp"
64 #include "MueLu_TransPFactory.hpp"
65 #include "MueLu_SmootherFactory.hpp"
66 
67 #include "MueLu_CoalesceDropFactory.hpp"
68 #include "MueLu_CoarseMapFactory.hpp"
69 #include "MueLu_CoordinatesTransferFactory.hpp"
70 #include "MueLu_UncoupledAggregationFactory.hpp"
71 #include "MueLu_TentativePFactory.hpp"
72 #include "MueLu_SaPFactory.hpp"
73 #include "MueLu_AggregationExportFactory.hpp"
74 #include "MueLu_Utilities.hpp"
75 #include "MueLu_Maxwell_Utils.hpp"
76 
77 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
78 #include "MueLu_AmalgamationFactory_kokkos.hpp"
79 #include "MueLu_CoalesceDropFactory_kokkos.hpp"
80 #include "MueLu_CoarseMapFactory_kokkos.hpp"
81 #include "MueLu_CoordinatesTransferFactory_kokkos.hpp"
82 #include "MueLu_UncoupledAggregationFactory_kokkos.hpp"
83 #include "MueLu_TentativePFactory_kokkos.hpp"
84 #include "MueLu_SaPFactory_kokkos.hpp"
85 #include "MueLu_Utilities_kokkos.hpp"
86 #include <Kokkos_Core.hpp>
87 #include <KokkosSparse_CrsMatrix.hpp>
88 #endif
89 
90 #include "MueLu_ZoltanInterface.hpp"
91 #include "MueLu_Zoltan2Interface.hpp"
92 #include "MueLu_RepartitionHeuristicFactory.hpp"
93 #include "MueLu_RepartitionFactory.hpp"
94 #include "MueLu_RebalanceAcFactory.hpp"
95 #include "MueLu_RebalanceTransferFactory.hpp"
96 
97 #include "MueLu_VerbosityLevel.hpp"
98 
101 
102 #ifdef HAVE_MUELU_CUDA
103 #include "cuda_profiler_api.h"
104 #endif
105 
106 // Stratimikos
107 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
108 // Thyra includes
109 #include <Thyra_VectorBase.hpp>
110 #include <Thyra_SolveSupportTypes.hpp>
111 // Stratimikos includes
112 #include <Stratimikos_DefaultLinearSolverBuilder.hpp>
114 #include "Teuchos_AbstractFactoryStd.hpp"
115 // Ifpack2 includes
116 #ifdef HAVE_MUELU_IFPACK2
117 #include <Thyra_Ifpack2PreconditionerFactory.hpp>
118 #endif
119 #endif
120 
121 
122 namespace MueLu {
123 
124  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
125  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getDomainMap() const {
126  return SM_Matrix_->getDomainMap();
127  }
128 
129 
130  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
131  Teuchos::RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getRangeMap() const {
132  return SM_Matrix_->getRangeMap();
133  }
134 
135 
136  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
138 
139  if (list.isType<std::string>("parameterlist: syntax") && list.get<std::string>("parameterlist: syntax") == "ml") {
140  Teuchos::ParameterList newList = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list,"refmaxwell"));
141  if(list.isSublist("refmaxwell: 11list") && list.sublist("refmaxwell: 11list").isSublist("edge matrix free: coarse"))
142  newList.sublist("refmaxwell: 11list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 11list").sublist("edge matrix free: coarse"),"SA"));
143  if(list.isSublist("refmaxwell: 22list"))
144  newList.sublist("refmaxwell: 22list") = *Teuchos::getParametersFromXmlString(MueLu::ML2MueLuParameterTranslator::translate(list.sublist("refmaxwell: 22list"),"SA"));
145  list = newList;
146  }
147 
148  parameterList_ = list;
149  std::string verbosityLevel = parameterList_.get<std::string>("verbosity", MasterList::getDefault<std::string>("verbosity"));
151  std::string outputFilename = parameterList_.get<std::string>("output filename", MasterList::getDefault<std::string>("output filename"));
152  if (outputFilename != "")
153  VerboseObject::SetMueLuOFileStream(outputFilename);
154  if (parameterList_.isType<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"))
155  VerboseObject::SetMueLuOStream(parameterList_.get<Teuchos::RCP<Teuchos::FancyOStream> >("output stream"));
156 
157  if (parameterList_.get("print initial parameters",MasterList::getDefault<bool>("print initial parameters")))
158  GetOStream(static_cast<MsgType>(Runtime1), 0) << parameterList_ << std::endl;
159  disable_addon_ = list.get("refmaxwell: disable addon", MasterList::getDefault<bool>("refmaxwell: disable addon"));
160  mode_ = list.get("refmaxwell: mode", MasterList::getDefault<std::string>("refmaxwell: mode"));
161  use_as_preconditioner_ = list.get("refmaxwell: use as preconditioner", MasterList::getDefault<bool>("refmaxwell: use as preconditioner"));
162  dump_matrices_ = list.get("refmaxwell: dump matrices", MasterList::getDefault<bool>("refmaxwell: dump matrices"));
163  enable_reuse_ = list.get("refmaxwell: enable reuse", MasterList::getDefault<bool>("refmaxwell: enable reuse"));
164  implicitTranspose_ = list.get("transpose: use implicit", MasterList::getDefault<bool>("transpose: use implicit"));
165  fuseProlongationAndUpdate_ = list.get("fuse prolongation and update", MasterList::getDefault<bool>("fuse prolongation and update"));
166  skipFirstLevel_ = list.get("refmaxwell: skip first (1,1) level", MasterList::getDefault<bool>("refmaxwell: skip first (1,1) level"));
167  syncTimers_ = list.get("sync timers", false);
168  numItersH_ = list.get("refmaxwell: num iters H", 1);
169  numIters22_ = list.get("refmaxwell: num iters 22", 1);
170  applyBCsToAnodal_ = list.get("refmaxwell: apply BCs to Anodal", false);
171  applyBCsToH_ = list.get("refmaxwell: apply BCs to H", true);
172  applyBCsTo22_ = list.get("refmaxwell: apply BCs to 22", true);
173 
174  if(list.isSublist("refmaxwell: 11list"))
175  precList11_ = list.sublist("refmaxwell: 11list");
176  if(!precList11_.isType<std::string>("Preconditioner Type") &&
177  !precList11_.isType<std::string>("smoother: type") &&
178  !precList11_.isType<std::string>("smoother: pre type") &&
179  !precList11_.isType<std::string>("smoother: post type")) {
180  precList11_.set("smoother: type", "CHEBYSHEV");
181  precList11_.sublist("smoother: params").set("chebyshev: degree",2);
182  precList11_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",5.4);
183  precList11_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
184  }
185 
186  if(list.isSublist("refmaxwell: 22list"))
187  precList22_ = list.sublist("refmaxwell: 22list");
188  if(!precList22_.isType<std::string>("Preconditioner Type") &&
189  !precList22_.isType<std::string>("smoother: type") &&
190  !precList22_.isType<std::string>("smoother: pre type") &&
191  !precList22_.isType<std::string>("smoother: post type")) {
192  precList22_.set("smoother: type", "CHEBYSHEV");
193  precList22_.sublist("smoother: params").set("chebyshev: degree",2);
194  precList22_.sublist("smoother: params").set("chebyshev: ratio eigenvalue",7.0);
195  precList22_.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
196  }
197 
198  if(!list.isType<std::string>("smoother: type") && !list.isType<std::string>("smoother: pre type") && !list.isType<std::string>("smoother: post type")) {
199  list.set("smoother: type", "CHEBYSHEV");
200  list.sublist("smoother: params").set("chebyshev: degree",2);
201  list.sublist("smoother: params").set("chebyshev: ratio eigenvalue",20.0);
202  list.sublist("smoother: params").set("chebyshev: eigenvalue max iterations",30);
203  }
204 
205  if(list.isSublist("smoother: params")) {
206  smootherList_ = list.sublist("smoother: params");
207  }
208 
209  if (enable_reuse_ &&
210  !precList11_.isType<std::string>("Preconditioner Type") &&
211  !precList11_.isParameter("reuse: type"))
212  precList11_.set("reuse: type", "full");
213  if (enable_reuse_ &&
214  !precList22_.isType<std::string>("Preconditioner Type") &&
215  !precList22_.isParameter("reuse: type"))
216  precList22_.set("reuse: type", "full");
217 
218 #if !defined(HAVE_MUELU_KOKKOS_REFACTOR)
219  useKokkos_ = false;
220 #else
221 # ifdef HAVE_MUELU_SERIAL
222  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosSerialWrapperNode).name())
223  useKokkos_ = false;
224 # endif
225 # ifdef HAVE_MUELU_OPENMP
226  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosOpenMPWrapperNode).name())
227  useKokkos_ = true;
228 # endif
229 # ifdef HAVE_MUELU_CUDA
230  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosCudaWrapperNode).name())
231  useKokkos_ = true;
232 # endif
233 # ifdef HAVE_MUELU_HIP
234  if (typeid(Node).name() == typeid(Kokkos::Compat::KokkosHIPWrapperNode).name())
235  useKokkos_ = true;
236 # endif
237  useKokkos_ = list.get("use kokkos refactor",useKokkos_);
238 #endif
239  }
240 
241 
242 
243  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
245 
246 #ifdef HAVE_MUELU_CUDA
247  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStart();
248 #endif
249 
250  std::string timerLabel;
251  if (reuse)
252  timerLabel = "MueLu RefMaxwell: compute (reuse)";
253  else
254  timerLabel = "MueLu RefMaxwell: compute";
255  RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
256 
258  // Remove explicit zeros from matrices
259  Maxwell_Utils<SC,LO,GO,NO>::removeExplicitZeros(parameterList_,D0_Matrix_,SM_Matrix_,M1_Matrix_,Ms_Matrix_);
260 
261  if (IsPrint(Statistics2)) {
262  RCP<ParameterList> params = rcp(new ParameterList());;
263  params->set("printLoadBalancingInfo", true);
264  params->set("printCommInfo", true);
265  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*SM_Matrix_, "SM_Matrix", params);
266  }
267 
269  // Detect Dirichlet boundary conditions
270  if (!reuse) {
271  magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
272  Maxwell_Utils<SC,LO,GO,NO>::detectBoundaryConditionsSM(SM_Matrix_,D0_Matrix_,rowSumTol,
274  useKokkos_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_,
275 #endif
276  BCedges_,BCnodes_,BCrows_,BCcols_,BCdomain_,
277  allEdgesBoundary_,allNodesBoundary_);
278  if (IsPrint(Statistics2)) {
279  GetOStream(Statistics2) << "MueLu::RefMaxwell::compute(): Detected " << BCedges_ << " BC rows and " << BCnodes_ << " BC columns." << std::endl;
280  }
281  }
282 
283  if (allEdgesBoundary_) {
284  // All edges have been detected as boundary edges.
285  // Do not attempt to construct sub-hierarchies, but just set up a single level preconditioner.
286  GetOStream(Warnings0) << "All edges are detected as boundary edges!" << std::endl;
287  mode_ = "none";
288  setFineLevelSmoother();
289  return;
290  }
291 
293  // build nullspace if necessary
294 
295  if(Nullspace_ != null) {
296  // no need to do anything - nullspace is built
297  TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
298  }
299  else if(Nullspace_ == null && Coords_ != null) {
300 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
301  RCP<MultiVector> CoordsSC;
302  if (useKokkos_)
303  CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
304  else
305  CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
306 #else
307  RCP<MultiVector> CoordsSC = Utilities::RealValuedToScalarMultiVector(Coords_);
308 #endif
309  Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
310  D0_Matrix_->apply(*CoordsSC,*Nullspace_);
311 
312  bool normalize = parameterList_.get<bool>("refmaxwell: normalize nullspace", MasterList::getDefault<bool>("refmaxwell: normalize nullspace"));
313 
314  coordinateType minLen, maxLen, meanLen;
315  if (IsPrint(Statistics2) || normalize){
316  // compute edge lengths
317  ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
318  for (size_t i = 0; i < Nullspace_->getNumVectors(); i++)
319  localNullspace[i] = Nullspace_->getData(i);
320  coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
321  coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
322  coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
323  for (size_t j=0; j < Nullspace_->getMap()->getNodeNumElements(); j++) {
324  Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
325  for (size_t i=0; i < Nullspace_->getNumVectors(); i++)
326  lenSC += localNullspace[i][j]*localNullspace[i][j];
327  coordinateType len = sqrt(Teuchos::ScalarTraits<Scalar>::real(lenSC));
328  localMinLen = std::min(localMinLen, len);
329  localMaxLen = std::max(localMaxLen, len);
330  localMeanLen += len;
331  }
332 
333  RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
334  MueLu_minAll(comm, localMinLen, minLen);
335  MueLu_sumAll(comm, localMeanLen, meanLen);
336  MueLu_maxAll(comm, localMaxLen, maxLen);
337  meanLen /= Nullspace_->getMap()->getGlobalNumElements();
338  }
339 
340  if (IsPrint(Statistics2)) {
341  GetOStream(Statistics0) << "Edge length (min/mean/max): " << minLen << " / " << meanLen << " / " << maxLen << std::endl;
342  }
343 
344  if (normalize) {
345  // normalize the nullspace
346  GetOStream(Runtime0) << "RefMaxwell::compute(): normalizing nullspace" << std::endl;
347 
348  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
349 
350  Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
351  Nullspace_->scale(normsSC());
352  }
353  }
354  else {
355  GetOStream(Errors) << "MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
356  }
357 
358  if (!reuse && skipFirstLevel_) {
359  // Nuke the BC edges in nullspace
360 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
361  if (useKokkos_)
362  Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
363  else
364  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
365 #else
366  Utilities::ZeroDirichletRows(Nullspace_,BCrows_);
367 #endif
368  dump(*Nullspace_, "nullspace.m");
369  }
370 
372  // build special prolongator for (1,1)-block
373 
374  if(P11_.is_null()) {
375  if (skipFirstLevel_) {
376  // Form A_nodal = D0* Ms D0 (aka TMT_agg)
377  Level fineLevel, coarseLevel;
378  fineLevel.SetFactoryManager(null);
379  coarseLevel.SetFactoryManager(null);
380  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
381  fineLevel.SetLevelID(0);
382  coarseLevel.SetLevelID(1);
383  fineLevel.Set("A",Ms_Matrix_);
384  coarseLevel.Set("P",D0_Matrix_);
385  coarseLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
386  fineLevel.setlib(Ms_Matrix_->getDomainMap()->lib());
387  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
388  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
389 
390  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
391  ParameterList rapList = *(rapFact->GetValidParameterList());
392  rapList.set("transpose: use implicit", true);
393  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
394  rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
395  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
396  rapFact->SetParameterList(rapList);
397 
398 
399  coarseLevel.Request("A", rapFact.get());
400 
401  A_nodal_Matrix_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
402 
403  if (applyBCsToAnodal_) {
404  // Apply boundary conditions to A_nodal
405 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
406  if (useKokkos_)
407  Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
408  else
409 #endif
410  Utilities::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomain_);
411  }
412  dump(*A_nodal_Matrix_, "A_nodal.m");
413  }
414 
415  // build special prolongator
416  GetOStream(Runtime0) << "RefMaxwell::compute(): building special prolongator" << std::endl;
417  buildProlongator();
418  }
419 
420 #ifdef HAVE_MPI
421  bool doRebalancing = false;
422  doRebalancing = parameterList_.get<bool>("refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>("refmaxwell: subsolves on subcommunicators"));
423  int rebalanceStriding = parameterList_.get<int>("refmaxwell: subsolves striding", -1);
424  int numProcsAH, numProcsA22;
425  int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
426  if (numProcs == 1)
427  doRebalancing = false;
428 #endif
429  {
430  // build coarse grid operator for (1,1)-block
431  formCoarseMatrix();
432 
433  if (!reuse) {
434 #ifdef HAVE_MPI
435  if (doRebalancing) {
436 
437  {
438  // decide on number of ranks for coarse (1, 1) problem
439 
440  Level level;
441  level.SetFactoryManager(null);
442  level.SetLevelID(0);
443  level.Set("A",AH_);
444 
445  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
446  ParameterList repartheurParams;
447  repartheurParams.set("repartition: start level", 0);
448  // Setting min == target on purpose.
449  int defaultTargetRows = 10000;
450  repartheurParams.set("repartition: min rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
451  repartheurParams.set("repartition: target rows per proc", precList11_.get<int>("repartition: target rows per proc", defaultTargetRows));
452  repartheurParams.set("repartition: min rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
453  repartheurParams.set("repartition: target rows per thread", precList11_.get<int>("repartition: target rows per thread", defaultTargetRows));
454  repartheurParams.set("repartition: max imbalance", precList11_.get<double>("repartition: max imbalance", 1.1));
455  repartheurFactory->SetParameterList(repartheurParams);
456 
457  level.Request("number of partitions", repartheurFactory.get());
458  repartheurFactory->Build(level);
459  numProcsAH = level.Get<int>("number of partitions", repartheurFactory.get());
460  numProcsAH = std::min(numProcsAH,numProcs);
461  }
462 
463  {
464  // decide on number of ranks for (2, 2) problem
465 
466  Level level;
467  level.SetFactoryManager(null);
468  level.SetLevelID(0);
469 
470  level.Set("Map",D0_Matrix_->getDomainMap());
471 
472  auto repartheurFactory = rcp(new RepartitionHeuristicFactory());
473  ParameterList repartheurParams;
474  repartheurParams.set("repartition: start level", 0);
475  repartheurParams.set("repartition: use map", true);
476  // Setting min == target on purpose.
477  int defaultTargetRows = 10000;
478  repartheurParams.set("repartition: min rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
479  repartheurParams.set("repartition: target rows per proc", precList22_.get<int>("repartition: target rows per proc", defaultTargetRows));
480  repartheurParams.set("repartition: min rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
481  repartheurParams.set("repartition: target rows per thread", precList22_.get<int>("repartition: target rows per thread", defaultTargetRows));
482  // repartheurParams.set("repartition: max imbalance", precList22_.get<double>("repartition: max imbalance", 1.1));
483  repartheurFactory->SetParameterList(repartheurParams);
484 
485  level.Request("number of partitions", repartheurFactory.get());
486  repartheurFactory->Build(level);
487  numProcsA22 = level.Get<int>("number of partitions", repartheurFactory.get());
488  numProcsA22 = std::min(numProcsA22,numProcs);
489  }
490 
491  if (rebalanceStriding >= 1) {
492  TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
493  TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
494  if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
495  GetOStream(Warnings0) << "RefMaxwell::compute(): Disabling striding = " << rebalanceStriding << ", since AH needs " << numProcsAH
496  << " procs and A22 needs " << numProcsA22 << " procs."<< std::endl;
497  rebalanceStriding = -1;
498  }
499  }
500 
501  // }
502 
503  // if (doRebalancing && !reuse) {
504  // rebalance AH
505  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Rebalance AH");
506 
507  Level fineLevel, coarseLevel;
508  fineLevel.SetFactoryManager(null);
509  coarseLevel.SetFactoryManager(null);
510  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
511  fineLevel.SetLevelID(0);
512  coarseLevel.SetLevelID(1);
513  coarseLevel.Set("A",AH_);
514  coarseLevel.Set("P",P11_);
515  coarseLevel.Set("Coordinates",CoordsH_);
516  if (!NullspaceH_.is_null())
517  coarseLevel.Set("Nullspace",NullspaceH_);
518  coarseLevel.Set("number of partitions", numProcsAH);
519  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
520 
521  coarseLevel.setlib(AH_->getDomainMap()->lib());
522  fineLevel.setlib(AH_->getDomainMap()->lib());
523  coarseLevel.setObjectLabel("RefMaxwell coarse (1,1)");
524  fineLevel.setObjectLabel("RefMaxwell coarse (1,1)");
525 
526  std::string partName = precList11_.get<std::string>("repartition: partitioner", "zoltan2");
527  RCP<Factory> partitioner;
528  if (partName == "zoltan") {
529 #ifdef HAVE_MUELU_ZOLTAN
530  partitioner = rcp(new ZoltanInterface());
531  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
532  // partitioner->SetFactory("number of partitions", repartheurFactory);
533 #else
534  throw Exceptions::RuntimeError("Zoltan interface is not available");
535 #endif
536  } else if (partName == "zoltan2") {
537 #ifdef HAVE_MUELU_ZOLTAN2
538  partitioner = rcp(new Zoltan2Interface());
539  ParameterList partParams;
540  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList11_.sublist("repartition: params", false)));
541  partParams.set("ParameterList", partpartParams);
542  partitioner->SetParameterList(partParams);
543  // partitioner->SetFactory("number of partitions", repartheurFactory);
544 #else
545  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
546 #endif
547  }
548 
549  auto repartFactory = rcp(new RepartitionFactory());
550  ParameterList repartParams;
551  repartParams.set("repartition: print partition distribution", precList11_.get<bool>("repartition: print partition distribution", false));
552  repartParams.set("repartition: remap parts", precList11_.get<bool>("repartition: remap parts", true));
553  if (rebalanceStriding >= 1) {
554  bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
555  if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
556  acceptPart = false;
557  repartParams.set("repartition: remap accept partition", acceptPart);
558  }
559  repartFactory->SetParameterList(repartParams);
560  // repartFactory->SetFactory("number of partitions", repartheurFactory);
561  repartFactory->SetFactory("Partition", partitioner);
562 
563  auto newP = rcp(new RebalanceTransferFactory());
564  ParameterList newPparams;
565  newPparams.set("type", "Interpolation");
566  newPparams.set("repartition: rebalance P and R", precList11_.get<bool>("repartition: rebalance P and R", false));
567  newPparams.set("repartition: use subcommunicators", true);
568  newPparams.set("repartition: rebalance Nullspace", !NullspaceH_.is_null());
569  newP->SetFactory("Coordinates", NoFactory::getRCP());
570  if (!NullspaceH_.is_null())
571  newP->SetFactory("Nullspace", NoFactory::getRCP());
572  newP->SetParameterList(newPparams);
573  newP->SetFactory("Importer", repartFactory);
574 
575  auto newA = rcp(new RebalanceAcFactory());
576  ParameterList rebAcParams;
577  rebAcParams.set("repartition: use subcommunicators", true);
578  newA->SetParameterList(rebAcParams);
579  newA->SetFactory("Importer", repartFactory);
580 
581  coarseLevel.Request("P", newP.get());
582  coarseLevel.Request("Importer", repartFactory.get());
583  coarseLevel.Request("A", newA.get());
584  coarseLevel.Request("Coordinates", newP.get());
585  if (!NullspaceH_.is_null())
586  coarseLevel.Request("Nullspace", newP.get());
587  repartFactory->Build(coarseLevel);
588 
589  if (!precList11_.get<bool>("repartition: rebalance P and R", false))
590  ImporterH_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
591  P11_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
592  AH_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
593  CoordsH_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
594  if (!NullspaceH_.is_null())
595  NullspaceH_ = coarseLevel.Get< RCP<MultiVector> >("Nullspace", newP.get());
596 
597  AH_AP_reuse_data_ = Teuchos::null;
598  AH_RAP_reuse_data_ = Teuchos::null;
599 
600  if (!disable_addon_ && enable_reuse_) {
601  // Rebalance the addon for next setup
602  RCP<const Import> ImporterH = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
603  RCP<const Map> targetMap = ImporterH->getTargetMap();
604  ParameterList XpetraList;
605  XpetraList.set("Restrict Communicator",true);
606  Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,false));
607  }
608  }
609 #endif // HAVE_MPI
610 
611 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
612  // This should be taken out again as soon as
613  // CoalesceDropFactory_kokkos supports BlockSize > 1 and
614  // drop tol != 0.0
615  if (useKokkos_ && precList11_.isParameter("aggregation: drop tol") && precList11_.get<double>("aggregation: drop tol") != 0.0) {
616  GetOStream(Warnings0) << "RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
617  << "support BlockSize > 1 and drop tol != 0.0" << std::endl;
618  precList11_.set("aggregation: drop tol", 0.0);
619  }
620 #endif
621  dump(*P11_, "P11.m");
622 
623  if (!implicitTranspose_) {
624 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
625  if (useKokkos_)
626  R11_ = Utilities_kokkos::Transpose(*P11_);
627  else
628  R11_ = Utilities::Transpose(*P11_);
629 #else
630  R11_ = Utilities::Transpose(*P11_);
631 #endif
632  dump(*R11_, "R11.m");
633  }
634  }
635 
637  if (!AH_.is_null()) {
638  // set fixed block size for vector nodal matrix
639  size_t dim = Nullspace_->getNumVectors();
640  AH_->SetFixedBlockSize(dim);
641  AH_->setObjectLabel("RefMaxwell coarse (1,1)");
642  dump(*AH_, "AH.m");
643  int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
644  if (IsPrint(Statistics2)) {
645  RCP<ParameterList> params = rcp(new ParameterList());;
646  params->set("printLoadBalancingInfo", true);
647  params->set("printCommInfo", true);
648  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*AH_, "AH", params);
649  }
650 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
651  if (precList11_.isType<std::string>("Preconditioner Type")) {
652  // build a Stratimikos preconditioner
653  if (precList11_.get<std::string>("Preconditioner Type") == "MueLu") {
654  ParameterList& userParamList = precList11_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
655  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
656  }
658  } else
659 #endif
660  {
661  // build a MueLu hierarchy
662 
663  if (!reuse) {
664  dumpCoords(*CoordsH_, "coordsH.m");
665  if (!NullspaceH_.is_null())
666  dump(*NullspaceH_, "NullspaceH.m");
667  ParameterList& userParamList = precList11_.sublist("user data");
668  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", CoordsH_);
669  if (!NullspaceH_.is_null())
670  userParamList.set<RCP<MultiVector> >("Nullspace", NullspaceH_);
671  HierarchyH_ = MueLu::CreateXpetraPreconditioner(AH_, precList11_);
672  } else {
673  RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
674  level0->Set("A", AH_);
675  HierarchyH_->SetupRe();
676  }
677  }
678  SetProcRankVerbose(oldRank);
679  }
680  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
681 
682  }
683 
684  if(!reuse && applyBCsTo22_) {
685  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
686 
687  D0_Matrix_->resumeFill();
688  Scalar replaceWith;
689  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
690  replaceWith= Teuchos::ScalarTraits<SC>::eps();
691  else
692  replaceWith = Teuchos::ScalarTraits<SC>::zero();
693 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
694  if (useKokkos_) {
695  Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
696  } else {
697  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
698  }
699 #else
700  Utilities::ZeroDirichletCols(D0_Matrix_,BCcols_,replaceWith);
701 #endif
702  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
703  }
704 
706  // Build A22 = D0* SM D0 and hierarchy for A22
707  if (!allNodesBoundary_) {
708  GetOStream(Runtime0) << "RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
709 
710  if (!reuse) { // build fine grid operator for (2,2)-block, D0* SM D0 (aka TMT)
711  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
712 
713  Level fineLevel, coarseLevel;
714  fineLevel.SetFactoryManager(null);
715  coarseLevel.SetFactoryManager(null);
716  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
717  fineLevel.SetLevelID(0);
718  coarseLevel.SetLevelID(1);
719  fineLevel.Set("A",SM_Matrix_);
720  coarseLevel.Set("P",D0_Matrix_);
721  coarseLevel.Set("Coordinates",Coords_);
722 
723  coarseLevel.setlib(SM_Matrix_->getDomainMap()->lib());
724  fineLevel.setlib(SM_Matrix_->getDomainMap()->lib());
725  coarseLevel.setObjectLabel("RefMaxwell (2,2)");
726  fineLevel.setObjectLabel("RefMaxwell (2,2)");
727 
728  RCP<RAPFactory> rapFact = rcp(new RAPFactory());
729  ParameterList rapList = *(rapFact->GetValidParameterList());
730  rapList.set("transpose: use implicit", true);
731  rapList.set("rap: fix zero diagonals", parameterList_.get<bool>("rap: fix zero diagonals", true));
732  rapList.set("rap: fix zero diagonals threshold", parameterList_.get<double>("rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
733  rapList.set("rap: triple product", parameterList_.get<bool>("rap: triple product", false));
734  rapFact->SetParameterList(rapList);
735 
736  if (!A22_AP_reuse_data_.is_null()) {
737  coarseLevel.AddKeepFlag("AP reuse data", rapFact.get());
738  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("AP reuse data", A22_AP_reuse_data_, rapFact.get());
739  }
740  if (!A22_RAP_reuse_data_.is_null()) {
741  coarseLevel.AddKeepFlag("RAP reuse data", rapFact.get());
742  coarseLevel.Set<Teuchos::RCP<Teuchos::ParameterList> >("RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
743  }
744 
745 #ifdef HAVE_MPI
746  if (doRebalancing) {
747 
748  coarseLevel.Set("number of partitions", numProcsA22);
749  coarseLevel.Set("repartition: heuristic target rows per process", 1000);
750 
751  std::string partName = precList22_.get<std::string>("repartition: partitioner", "zoltan2");
752  RCP<Factory> partitioner;
753  if (partName == "zoltan") {
754 #ifdef HAVE_MUELU_ZOLTAN
755  partitioner = rcp(new ZoltanInterface());
756  partitioner->SetFactory("A", rapFact);
757  // partitioner->SetFactory("number of partitions", repartheurFactory);
758  // NOTE: ZoltanInteface ("zoltan") does not support external parameters through ParameterList
759 #else
760  throw Exceptions::RuntimeError("Zoltan interface is not available");
761 #endif
762  } else if (partName == "zoltan2") {
763 #ifdef HAVE_MUELU_ZOLTAN2
764  partitioner = rcp(new Zoltan2Interface());
765  ParameterList partParams;
766  RCP<const ParameterList> partpartParams = rcp(new ParameterList(precList22_.sublist("repartition: params", false)));
767  partParams.set("ParameterList", partpartParams);
768  partitioner->SetParameterList(partParams);
769  partitioner->SetFactory("A", rapFact);
770  // partitioner->SetFactory("number of partitions", repartheurFactory);
771 #else
772  throw Exceptions::RuntimeError("Zoltan2 interface is not available");
773 #endif
774  }
775 
776  auto repartFactory = rcp(new RepartitionFactory());
777  ParameterList repartParams;
778  repartParams.set("repartition: print partition distribution", precList22_.get<bool>("repartition: print partition distribution", false));
779  repartParams.set("repartition: remap parts", precList22_.get<bool>("repartition: remap parts", true));
780  if (rebalanceStriding >= 1) {
781  bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
782  if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
783  acceptPart = false;
784  if (acceptPart)
785  TEUCHOS_ASSERT(AH_.is_null());
786  repartParams.set("repartition: remap accept partition", acceptPart);
787  } else
788  repartParams.set("repartition: remap accept partition", AH_.is_null());
789  repartFactory->SetParameterList(repartParams);
790  repartFactory->SetFactory("A", rapFact);
791  // repartFactory->SetFactory("number of partitions", repartheurFactory);
792  repartFactory->SetFactory("Partition", partitioner);
793 
794  auto newP = rcp(new RebalanceTransferFactory());
795  ParameterList newPparams;
796  newPparams.set("type", "Interpolation");
797  newPparams.set("repartition: rebalance P and R", precList22_.get<bool>("repartition: rebalance P and R", false));
798  newPparams.set("repartition: use subcommunicators", true);
799  newPparams.set("repartition: rebalance Nullspace", false);
800  newP->SetFactory("Coordinates", NoFactory::getRCP());
801  newP->SetParameterList(newPparams);
802  newP->SetFactory("Importer", repartFactory);
803 
804  auto newA = rcp(new RebalanceAcFactory());
805  ParameterList rebAcParams;
806  rebAcParams.set("repartition: use subcommunicators", true);
807  newA->SetParameterList(rebAcParams);
808  newA->SetFactory("A", rapFact);
809  newA->SetFactory("Importer", repartFactory);
810 
811  coarseLevel.Request("P", newP.get());
812  coarseLevel.Request("Importer", repartFactory.get());
813  coarseLevel.Request("A", newA.get());
814  coarseLevel.Request("Coordinates", newP.get());
815  rapFact->Build(fineLevel,coarseLevel);
816  repartFactory->Build(coarseLevel);
817 
818  if (!precList22_.get<bool>("repartition: rebalance P and R", false))
819  Importer22_ = coarseLevel.Get< RCP<const Import> >("Importer", repartFactory.get());
820  D0_Matrix_ = coarseLevel.Get< RCP<Matrix> >("P", newP.get());
821  A22_ = coarseLevel.Get< RCP<Matrix> >("A", newA.get());
822  Coords_ = coarseLevel.Get< RCP<RealValuedMultiVector> >("Coordinates", newP.get());
823 
824  } else
825 #endif // HAVE_MPI
826  {
827  coarseLevel.Request("A", rapFact.get());
828  if (enable_reuse_) {
829  coarseLevel.Request("AP reuse data", rapFact.get());
830  coarseLevel.Request("RAP reuse data", rapFact.get());
831  }
832 
833  A22_ = coarseLevel.Get< RCP<Matrix> >("A", rapFact.get());
834 
835  if (enable_reuse_) {
836  if (!parameterList_.get<bool>("rap: triple product", false))
837  A22_AP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("AP reuse data", rapFact.get());
838  A22_RAP_reuse_data_ = coarseLevel.Get< RCP<ParameterList> >("RAP reuse data", rapFact.get());
839  }
840  }
841  } else {
842  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build A22");
843  if (Importer22_.is_null()) {
844  RCP<Matrix> temp;
845  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
846  if (!implicitTranspose_)
847  A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,A22_,GetOStream(Runtime0),true,true);
848  else
849  A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,A22_,GetOStream(Runtime0),true,true);
850  } else {
851  // we replaced domain map and importer on D0, reverse that
852  RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
853  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
854 
855  RCP<Matrix> temp, temp2;
856  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*D0_Matrix_,false,temp,GetOStream(Runtime0),true,true);
857  if (!implicitTranspose_)
858  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,false,*temp,false,temp2,GetOStream(Runtime0),true,true);
859  else
860  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
861 
862  // and back again
863  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
864 
865  ParameterList XpetraList;
866  XpetraList.set("Restrict Communicator",true);
867  XpetraList.set("Timer Label","MueLu::RebalanceA22");
868  RCP<const Map> targetMap = Importer22_->getTargetMap();
869  A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,false));
870  }
871  }
872 
873  if (!implicitTranspose_ && !reuse) {
874 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
875  if (useKokkos_)
876  D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
877  else
878  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
879 #else
880  D0_T_Matrix_ = Utilities::Transpose(*D0_Matrix_);
881 #endif
882  }
883 
885  if (!A22_.is_null()) {
886  dump(*A22_, "A22.m");
887  A22_->setObjectLabel("RefMaxwell (2,2)");
888  int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
889  if (IsPrint(Statistics2)) {
890  RCP<ParameterList> params = rcp(new ParameterList());;
891  params->set("printLoadBalancingInfo", true);
892  params->set("printCommInfo", true);
893  GetOStream(Statistics2) << PerfUtils::PrintMatrixInfo(*A22_, "A22", params);
894  }
895 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
896  if (precList22_.isType<std::string>("Preconditioner Type")) {
897  // build a Stratimikos preconditioner
898  if (precList22_.get<std::string>("Preconditioner Type") == "MueLu") {
899  ParameterList& userParamList = precList22_.sublist("Preconditioner Types").sublist("MueLu").sublist("user data");
900  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
901  }
903  } else
904 #endif
905  {
906  // build a MueLu hierarchy
907  if (!reuse) {
908  ParameterList& userParamList = precList22_.sublist("user data");
909  userParamList.set<RCP<RealValuedMultiVector> >("Coordinates", Coords_);
910  // If we detected no boundary conditions, the (2,2) problem is singular.
911  // Therefore, if we want to use a direct coarse solver, we need to fix up the nullspace.
912  std::string coarseType = "";
913  if (precList22_.isParameter("coarse: type")) {
914  coarseType = precList22_.get<std::string>("coarse: type");
915  // Transform string to "Abcde" notation
916  std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
917  std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
918  }
919  if (BCedges_ == 0 &&
920  (coarseType == "" ||
921  coarseType == "Klu" ||
922  coarseType == "Klu2") &&
923  (!precList22_.isSublist("coarse: params") ||
924  !precList22_.sublist("coarse: params").isParameter("fix nullspace")))
925  precList22_.sublist("coarse: params").set("fix nullspace",true);
926  Hierarchy22_ = MueLu::CreateXpetraPreconditioner(A22_, precList22_);
927  } else {
928  RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
929  level0->Set("A", A22_);
930  Hierarchy22_->SetupRe();
931  }
932  }
933  SetProcRankVerbose(oldRank);
934  }
935  VerboseObject::SetDefaultVerbLevel(verbosityLevel);
936 
937  }
938 
939  if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
940  GetOStream(Runtime0) << "RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
941 
942  D0_Matrix_->resumeFill();
943  Scalar replaceWith;
944  if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
945  replaceWith= Teuchos::ScalarTraits<SC>::eps();
946  else
947  replaceWith = Teuchos::ScalarTraits<SC>::zero();
948 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
949  if (useKokkos_) {
950  Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
951  } else {
952  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
953  }
954 #else
955  Utilities::ZeroDirichletRows(D0_Matrix_,BCrows_,replaceWith);
956 #endif
957  D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
958  dump(*D0_Matrix_, "D0_nuked.m");
959  }
960 
961  setFineLevelSmoother();
962 
963  if (!reuse) {
964  if (!ImporterH_.is_null()) {
965  RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
966  rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
967  }
968 
969  if (!Importer22_.is_null()) {
970  if (enable_reuse_) {
971  D0origDomainMap_ = D0_Matrix_->getDomainMap();
972  D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
973  }
974  RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
975  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
976  }
977 
978 #ifdef HAVE_MUELU_TPETRA
979  if ((!D0_T_Matrix_.is_null()) &&
980  (!R11_.is_null()) &&
981  (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
982  (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
983  (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
984  (R11_->getColMap()->lib() == Xpetra::UseTpetra))
985  D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
986  else
987 #endif
988  D0_T_R11_colMapsMatch_ = false;
989  if (D0_T_R11_colMapsMatch_)
990  GetOStream(Runtime0) << "RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
991 
992  // Allocate temporary MultiVectors for solve
993  allocateMemory(1);
994 
995  if (parameterList_.isSublist("matvec params"))
996  {
997  RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist("matvec params"));
998  Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_Matrix_, matvecParams);
999  Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*P11_, matvecParams);
1000  if (!D0_T_Matrix_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*D0_T_Matrix_, matvecParams);
1001  if (!R11_.is_null()) Maxwell_Utils<SC,LO,GO,NO>::setMatvecParams(*R11_, matvecParams);
1002  if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
1003  if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
1004  }
1005  if (!ImporterH_.is_null() && parameterList_.isSublist("refmaxwell: ImporterH params")){
1006  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: ImporterH params"));
1007  ImporterH_->setDistributorParameters(importerParams);
1008  }
1009  if (!Importer22_.is_null() && parameterList_.isSublist("refmaxwell: Importer22 params")){
1010  RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist("refmaxwell: Importer22 params"));
1011  Importer22_->setDistributorParameters(importerParams);
1012  }
1013  }
1014 
1015  describe(GetOStream(Runtime0));
1016 
1017 #ifdef HAVE_MUELU_CUDA
1018  if (parameterList_.get<bool>("refmaxwell: cuda profile setup", false)) cudaProfilerStop();
1019 #endif
1020  }
1021 
1022 
1023  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1025  if (parameterList_.isType<std::string>("smoother: type") &&
1026  parameterList_.get<std::string>("smoother: type") == "hiptmair" &&
1027  SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1028  A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1029  D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
1030 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1031  ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
1032 
1033  if (smootherList_.isSublist("smoother: pre params"))
1034  smootherPreList = smootherList_.sublist("smoother: pre params");
1035  else if (smootherList_.isSublist("smoother: params"))
1036  smootherPreList = smootherList_.sublist("smoother: params");
1037  hiptmairPreList.set("hiptmair: smoother type 1",
1038  smootherPreList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1039  hiptmairPreList.set("hiptmair: smoother type 2",
1040  smootherPreList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1041  if(smootherPreList.isSublist("hiptmair: smoother list 1"))
1042  hiptmairPreList.set("hiptmair: smoother list 1", smootherPreList.sublist("hiptmair: smoother list 1"));
1043  if(smootherPreList.isSublist("hiptmair: smoother list 2"))
1044  hiptmairPreList.set("hiptmair: smoother list 2", smootherPreList.sublist("hiptmair: smoother list 2"));
1045  hiptmairPreList.set("hiptmair: pre or post",
1046  smootherPreList.get<std::string>("hiptmair: pre or post", "pre"));
1047  hiptmairPreList.set("hiptmair: zero starting solution",
1048  smootherPreList.get<bool>("hiptmair: zero starting solution", true));
1049 
1050  if (smootherList_.isSublist("smoother: post params"))
1051  smootherPostList = smootherList_.sublist("smoother: post params");
1052  else if (smootherList_.isSublist("smoother: params"))
1053  smootherPostList = smootherList_.sublist("smoother: params");
1054  hiptmairPostList.set("hiptmair: smoother type 1",
1055  smootherPostList.get<std::string>("hiptmair: smoother type 1", "CHEBYSHEV"));
1056  hiptmairPostList.set("hiptmair: smoother type 2",
1057  smootherPostList.get<std::string>("hiptmair: smoother type 2", "CHEBYSHEV"));
1058  if(smootherPostList.isSublist("hiptmair: smoother list 1"))
1059  hiptmairPostList.set("hiptmair: smoother list 1", smootherPostList.sublist("hiptmair: smoother list 1"));
1060  if(smootherPostList.isSublist("hiptmair: smoother list 2"))
1061  hiptmairPostList.set("hiptmair: smoother list 2", smootherPostList.sublist("hiptmair: smoother list 2"));
1062  hiptmairPostList.set("hiptmair: pre or post",
1063  smootherPostList.get<std::string>("hiptmair: pre or post", "post"));
1064  hiptmairPostList.set("hiptmair: zero starting solution",
1065  smootherPostList.get<bool>("hiptmair: zero starting solution", false));
1066 
1067  typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
1068  RCP<const TROW > EdgeMatrix = Utilities::Op2NonConstTpetraRow(SM_Matrix_);
1069  RCP<const TROW > NodeMatrix = Utilities::Op2NonConstTpetraRow(A22_);
1070  RCP<const TROW > PMatrix = Utilities::Op2NonConstTpetraRow(D0_Matrix_);
1071 
1072  hiptmairPreSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1073  hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
1074  hiptmairPreSmoother_ -> initialize();
1075  hiptmairPreSmoother_ -> compute();
1076  hiptmairPostSmoother_ = rcp( new Ifpack2::Hiptmair<TROW>(EdgeMatrix,NodeMatrix,PMatrix) );
1077  hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
1078  hiptmairPostSmoother_ -> initialize();
1079  hiptmairPostSmoother_ -> compute();
1080  useHiptmairSmoothing_ = true;
1081 #else
1082  throw(Xpetra::Exceptions::RuntimeError("MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
1083 #endif // defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1084  } else {
1085 
1086  Level level;
1087  RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(new FactoryManager());
1088  level.SetFactoryManager(factoryHandler);
1089  level.SetLevelID(0);
1090  level.setObjectLabel("RefMaxwell (1,1)");
1091  level.Set("A",SM_Matrix_);
1092  level.setlib(SM_Matrix_->getDomainMap()->lib());
1093 
1094  if (parameterList_.isType<std::string>("smoother: pre type") && parameterList_.isType<std::string>("smoother: post type")) {
1095  std::string preSmootherType = parameterList_.get<std::string>("smoother: pre type");
1096  std::string postSmootherType = parameterList_.get<std::string>("smoother: post type");
1097 
1098  ParameterList preSmootherList, postSmootherList;
1099  if (parameterList_.isSublist("smoother: pre params"))
1100  preSmootherList = parameterList_.sublist("smoother: pre params");
1101  if (parameterList_.isSublist("smoother: post params"))
1102  postSmootherList = parameterList_.sublist("smoother: post params");
1103 
1104  RCP<SmootherPrototype> preSmootherPrototype = rcp(new TrilinosSmoother(preSmootherType, preSmootherList));
1105  RCP<SmootherPrototype> postSmootherPrototype = rcp(new TrilinosSmoother(postSmootherType, postSmootherList));
1106  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
1107 
1108  level.Request("PreSmoother",smootherFact.get());
1109  level.Request("PostSmoother",smootherFact.get());
1110  if (enable_reuse_) {
1111  ParameterList smootherFactoryParams;
1112  smootherFactoryParams.set("keep smoother data", true);
1113  smootherFact->SetParameterList(smootherFactoryParams);
1114  level.Request("PreSmoother data", smootherFact.get());
1115  level.Request("PostSmoother data", smootherFact.get());
1116  if (!PreSmootherData_.is_null())
1117  level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1118  if (!PostSmootherData_.is_null())
1119  level.Set("PostSmoother data", PostSmootherData_, smootherFact.get());
1120  }
1121  smootherFact->Build(level);
1122  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1123  PostSmoother_ = level.Get<RCP<SmootherBase> >("PostSmoother",smootherFact.get());
1124  if (enable_reuse_) {
1125  PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1126  PostSmootherData_ = level.Get<RCP<SmootherPrototype> >("PostSmoother data",smootherFact.get());
1127  }
1128  } else {
1129  std::string smootherType = parameterList_.get<std::string>("smoother: type", "CHEBYSHEV");
1130 
1131  RCP<SmootherPrototype> smootherPrototype = rcp(new TrilinosSmoother(smootherType, smootherList_));
1132  RCP<SmootherFactory> smootherFact = rcp(new SmootherFactory(smootherPrototype));
1133  level.Request("PreSmoother",smootherFact.get());
1134  if (enable_reuse_) {
1135  ParameterList smootherFactoryParams;
1136  smootherFactoryParams.set("keep smoother data", true);
1137  smootherFact->SetParameterList(smootherFactoryParams);
1138  level.Request("PreSmoother data", smootherFact.get());
1139  if (!PreSmootherData_.is_null())
1140  level.Set("PreSmoother data", PreSmootherData_, smootherFact.get());
1141  }
1142  smootherFact->Build(level);
1143  PreSmoother_ = level.Get<RCP<SmootherBase> >("PreSmoother",smootherFact.get());
1144  PostSmoother_ = PreSmoother_;
1145  if (enable_reuse_)
1146  PreSmootherData_ = level.Get<RCP<SmootherPrototype> >("PreSmoother data",smootherFact.get());
1147  }
1148  useHiptmairSmoothing_ = false;
1149  }
1150  }
1151 
1152 
1153  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1155 
1156  RCP<Teuchos::TimeMonitor> tmAlloc = getTimer("MueLu RefMaxwell: Allocate MVs");
1157 
1158  if (!R11_.is_null())
1159  P11res_ = MultiVectorFactory::Build(R11_->getRangeMap(), numVectors);
1160  else
1161  P11res_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1162  P11res_->setObjectLabel("P11res");
1163  if (D0_T_R11_colMapsMatch_) {
1164  D0TR11Tmp_ = MultiVectorFactory::Build(R11_->getColMap(), numVectors);
1165  D0TR11Tmp_->setObjectLabel("D0TR11Tmp");
1166  }
1167  if (!ImporterH_.is_null()) {
1168  P11resTmp_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1169  P11resTmp_->setObjectLabel("P11resTmp");
1170  P11x_ = MultiVectorFactory::Build(ImporterH_->getTargetMap(), numVectors);
1171  } else
1172  P11x_ = MultiVectorFactory::Build(P11_->getDomainMap(), numVectors);
1173  P11x_->setObjectLabel("P11x");
1174  if (!D0_T_Matrix_.is_null())
1175  D0res_ = MultiVectorFactory::Build(D0_T_Matrix_->getRangeMap(), numVectors);
1176  else
1177  D0res_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1178  D0res_->setObjectLabel("D0res");
1179  if (!Importer22_.is_null()) {
1180  D0resTmp_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1181  D0resTmp_->setObjectLabel("D0resTmp");
1182  D0x_ = MultiVectorFactory::Build(Importer22_->getTargetMap(), numVectors);
1183  } else
1184  D0x_ = MultiVectorFactory::Build(D0_Matrix_->getDomainMap(), numVectors);
1185  D0x_->setObjectLabel("D0x");
1186  if (!AH_.is_null()) {
1187  if (!ImporterH_.is_null() && !implicitTranspose_)
1188  P11resSubComm_ = MultiVectorFactory::Build(P11resTmp_, Teuchos::View);
1189  else
1190  P11resSubComm_ = MultiVectorFactory::Build(P11res_, Teuchos::View);
1191  P11resSubComm_->replaceMap(AH_->getRangeMap());
1192  P11resSubComm_->setObjectLabel("P11resSubComm");
1193 
1194  P11xSubComm_ = MultiVectorFactory::Build(P11x_, Teuchos::View);
1195  P11xSubComm_->replaceMap(AH_->getDomainMap());
1196  P11xSubComm_->setObjectLabel("P11xSubComm");
1197  }
1198  if (!A22_.is_null()) {
1199  if (!Importer22_.is_null() && !implicitTranspose_)
1200  D0resSubComm_ = MultiVectorFactory::Build(D0resTmp_, Teuchos::View);
1201  else
1202  D0resSubComm_ = MultiVectorFactory::Build(D0res_, Teuchos::View);
1203  D0resSubComm_->replaceMap(A22_->getRangeMap());
1204  D0resSubComm_->setObjectLabel("D0resSubComm");
1205 
1206  D0xSubComm_ = MultiVectorFactory::Build(D0x_, Teuchos::View);
1207  D0xSubComm_->replaceMap(A22_->getDomainMap());
1208  D0xSubComm_->setObjectLabel("D0xSubComm");
1209  }
1210  residual_ = MultiVectorFactory::Build(SM_Matrix_->getDomainMap(), numVectors);
1211  residual_->setObjectLabel("residual");
1212  }
1213 
1214 
1215  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1216  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Matrix& A, std::string name) const {
1217  if (dump_matrices_) {
1218  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1219  Xpetra::IO<SC, LO, GO, NO>::Write(name, A);
1220  }
1221  }
1222 
1223 
1224  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1225  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const MultiVector& X, std::string name) const {
1226  if (dump_matrices_) {
1227  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1228  Xpetra::IO<SC, LO, GO, NO>::Write(name, X);
1229  }
1230  }
1231 
1232 
1233  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1235  if (dump_matrices_) {
1236  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1237  Xpetra::IO<coordinateType, LO, GO, NO>::Write(name, X);
1238  }
1239  }
1240 
1241 
1242  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1243  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dump(const Teuchos::ArrayRCP<bool>& v, std::string name) const {
1244  if (dump_matrices_) {
1245  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1246  std::ofstream out(name);
1247  for (size_t i = 0; i < Teuchos::as<size_t>(v.size()); i++)
1248  out << v[i] << "\n";
1249  }
1250  }
1251 
1252 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1253  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1255  if (dump_matrices_) {
1256  GetOStream(Runtime0) << "Dumping to " << name << std::endl;
1257  std::ofstream out(name);
1258  auto vH = Kokkos::create_mirror_view (v);
1259  Kokkos::deep_copy(vH , v);
1260  for (size_t i = 0; i < vH.size(); i++)
1261  out << vH[i] << "\n";
1262  }
1263  }
1264 #endif
1265 
1266  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1267  Teuchos::RCP<Teuchos::TimeMonitor> RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::getTimer(std::string name, RCP<const Teuchos::Comm<int> > comm) const {
1268  if (IsPrint(Timings)) {
1269  if (!syncTimers_)
1270  return Teuchos::rcp(new Teuchos::TimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name)));
1271  else {
1272  if (comm.is_null())
1273  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), SM_Matrix_->getRowMap()->getComm().ptr()));
1274  else
1275  return Teuchos::rcp(new Teuchos::SyncTimeMonitor(*Teuchos::TimeMonitor::getNewTimer(name), comm.ptr()));
1276  }
1277  } else
1278  return Teuchos::null;
1279  }
1280 
1281 
1282  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1284  // The P11 matrix maps node based aggregrates { A_j } to edges { e_i }.
1285  //
1286  // The old implementation used
1287  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i) * P(n_l, A_j)
1288  // yet the paper gives
1289  // P11(i, j*dim+k) = sum_{nodes n_l in e_i intersected with A_j} 0.5 * phi_k(e_i)
1290  // where phi_k is the k-th nullspace vector.
1291  //
1292  // The graph of D0 contains the incidence from nodes to edges.
1293  // The nodal prolongator P maps aggregates to nodes.
1294 
1295  const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1296  const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1297  const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1298  size_t dim = Nullspace_->getNumVectors();
1299  size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1300 
1301  RCP<Matrix> P_nodal;
1302  RCP<CrsMatrix> P_nodal_imported;
1303  RCP<MultiVector> Nullspace_nodal;
1304  if (skipFirstLevel_) {
1305  // build prolongator: algorithm 1 in the reference paper
1306  // First, build nodal unsmoothed prolongator using the matrix A_nodal
1307  bool read_P_from_file = parameterList_.get("refmaxwell: read_P_from_file",false);
1308  if (read_P_from_file) {
1309  // This permits to read in an ML prolongator, so that we get the same hierarchy.
1310  // (ML and MueLu typically produce different aggregates.)
1311  std::string P_filename = parameterList_.get("refmaxwell: P_filename",std::string("P.m"));
1312  std::string domainmap_filename = parameterList_.get("refmaxwell: P_domainmap_filename",std::string("domainmap_P.m"));
1313  std::string colmap_filename = parameterList_.get("refmaxwell: P_colmap_filename",std::string("colmap_P.m"));
1314  std::string coords_filename = parameterList_.get("refmaxwell: CoordsH",std::string("coordsH.m"));
1315  RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1316  RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1317  P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1318  CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1319  } else {
1320  Level fineLevel, coarseLevel;
1321  fineLevel.SetFactoryManager(null);
1322  coarseLevel.SetFactoryManager(null);
1323  coarseLevel.SetPreviousLevel(rcpFromRef(fineLevel));
1324  fineLevel.SetLevelID(0);
1325  coarseLevel.SetLevelID(1);
1326  fineLevel.Set("A",A_nodal_Matrix_);
1327  fineLevel.Set("Coordinates",Coords_);
1328  fineLevel.Set("DofsPerNode",1);
1329  coarseLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1330  fineLevel.setlib(A_nodal_Matrix_->getDomainMap()->lib());
1331  coarseLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1332  fineLevel.setObjectLabel("RefMaxwell (1,1) A_nodal");
1333 
1334  LocalOrdinal NSdim = 1;
1335  RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1336  nullSpace->putScalar(SC_ONE);
1337  fineLevel.Set("Nullspace",nullSpace);
1338 
1339  RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1340 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1341  if (useKokkos_) {
1342  amalgFact = rcp(new AmalgamationFactory_kokkos());
1343  dropFact = rcp(new CoalesceDropFactory_kokkos());
1344  UncoupledAggFact = rcp(new UncoupledAggregationFactory_kokkos());
1345  coarseMapFact = rcp(new CoarseMapFactory_kokkos());
1346  TentativePFact = rcp(new TentativePFactory_kokkos());
1347  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1348  SaPFact = rcp(new SaPFactory_kokkos());
1349  Tfact = rcp(new CoordinatesTransferFactory_kokkos());
1350  } else
1351 #endif
1352  {
1353  amalgFact = rcp(new AmalgamationFactory());
1354  dropFact = rcp(new CoalesceDropFactory());
1355  UncoupledAggFact = rcp(new UncoupledAggregationFactory());
1356  coarseMapFact = rcp(new CoarseMapFactory());
1357  TentativePFact = rcp(new TentativePFactory());
1358  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1359  SaPFact = rcp(new SaPFactory());
1360  Tfact = rcp(new CoordinatesTransferFactory());
1361  }
1362  dropFact->SetFactory("UnAmalgamationInfo", amalgFact);
1363  double dropTol = parameterList_.get("aggregation: drop tol",0.0);
1364  std::string dropScheme = parameterList_.get("aggregation: drop scheme","classical");
1365  std::string distLaplAlgo = parameterList_.get("aggregation: distance laplacian algo","default");
1366  dropFact->SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1367  dropFact->SetParameter("aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1368  if (!useKokkos_)
1369  dropFact->SetParameter("aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1370 
1371  UncoupledAggFact->SetFactory("Graph", dropFact);
1372  int minAggSize = parameterList_.get("aggregation: min agg size",2);
1373  UncoupledAggFact->SetParameter("aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1374  int maxAggSize = parameterList_.get("aggregation: max agg size",-1);
1375  UncoupledAggFact->SetParameter("aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1376 
1377  coarseMapFact->SetFactory("Aggregates", UncoupledAggFact);
1378 
1379  TentativePFact->SetFactory("Aggregates", UncoupledAggFact);
1380  TentativePFact->SetFactory("UnAmalgamationInfo", amalgFact);
1381  TentativePFact->SetFactory("CoarseMap", coarseMapFact);
1382 
1383  Tfact->SetFactory("Aggregates", UncoupledAggFact);
1384  Tfact->SetFactory("CoarseMap", coarseMapFact);
1385 
1386  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa") {
1387  SaPFact->SetFactory("P", TentativePFact);
1388  coarseLevel.Request("P", SaPFact.get());
1389  } else
1390  coarseLevel.Request("P",TentativePFact.get());
1391  coarseLevel.Request("Nullspace",TentativePFact.get());
1392  coarseLevel.Request("Coordinates",Tfact.get());
1393 
1394  RCP<AggregationExportFactory> aggExport;
1395  if (parameterList_.get("aggregation: export visualization data",false)) {
1396  aggExport = rcp(new AggregationExportFactory());
1397  ParameterList aggExportParams;
1398  aggExportParams.set("aggregation: output filename", "aggs.vtk");
1399  aggExportParams.set("aggregation: output file: agg style", "Jacks");
1400  aggExport->SetParameterList(aggExportParams);
1401 
1402  aggExport->SetFactory("Aggregates", UncoupledAggFact);
1403  aggExport->SetFactory("UnAmalgamationInfo", amalgFact);
1404  fineLevel.Request("Aggregates",UncoupledAggFact.get());
1405  fineLevel.Request("UnAmalgamationInfo",amalgFact.get());
1406  }
1407 
1408  if (parameterList_.get("multigrid algorithm","unsmoothed") == "sa")
1409  coarseLevel.Get("P",P_nodal,SaPFact.get());
1410  else
1411  coarseLevel.Get("P",P_nodal,TentativePFact.get());
1412  coarseLevel.Get("Nullspace",Nullspace_nodal,TentativePFact.get());
1413  coarseLevel.Get("Coordinates",CoordsH_,Tfact.get());
1414 
1415 
1416  if (parameterList_.get("aggregation: export visualization data",false))
1417  aggExport->Build(fineLevel, coarseLevel);
1418  }
1419  dump(*P_nodal, "P_nodal.m");
1420  dump(*Nullspace_nodal, "Nullspace_nodal.m");
1421 
1422 
1423  RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1424 
1425  // Import off-rank rows of P_nodal into P_nodal_imported
1426  int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1427  if (numProcs > 1) {
1428  RCP<CrsMatrixWrap> P_nodal_temp;
1429  RCP<const Map> targetMap = D0Crs->getColMap();
1430  P_nodal_temp = rcp(new CrsMatrixWrap(targetMap));
1431  RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1432  P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1433  P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1434  rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1435  P_nodal_imported = P_nodal_temp->getCrsMatrix();
1436  dump(*P_nodal_temp, "P_nodal_imported.m");
1437  } else
1438  P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1439 
1440  }
1441 
1442 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
1443  if (useKokkos_) {
1444 
1445  using ATS = Kokkos::ArithTraits<SC>;
1446  using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1448 
1449  typedef typename Matrix::local_matrix_type KCRS;
1450  typedef typename KCRS::device_type device_t;
1451  typedef typename KCRS::StaticCrsGraphType graph_t;
1452  typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1453  typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1454  typedef typename KCRS::values_type::non_const_type scalar_view_t;
1455 
1456 
1457  // Which algorithm should we use for the construction of the special prolongator?
1458  // Option "mat-mat":
1459  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1460  std::string defaultAlgo = "mat-mat";
1461  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1462 
1463  if (skipFirstLevel_) {
1464  // Get data out of P_nodal_imported and D0.
1465 
1466  if (algo == "mat-mat") {
1467  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1468  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1469 
1470 #ifdef HAVE_MUELU_DEBUG
1471  TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1472 #endif
1473 
1474  // Get data out of D0*P.
1475  auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1476 
1477  // Create the matrix object
1478  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1479  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1480 
1481  size_t nnzEstimate = dim*localD0P.graph.entries.size();
1482  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1483  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1484  scalar_view_t P11vals("P11_vals",nnzEstimate);
1485 
1486  // adjust rowpointer
1487  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1488  KOKKOS_LAMBDA(const size_t i) {
1489  P11rowptr(i) = dim*localD0P.graph.row_map(i);
1490  });
1491 
1492  // adjust column indices
1493  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1494  KOKKOS_LAMBDA(const size_t jj) {
1495  for (size_t k = 0; k < dim; k++) {
1496  P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1497  P11vals(dim*jj+k) = SC_ZERO;
1498  }
1499  });
1500 
1501  auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1502 
1503  // enter values
1504  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1505  // The matrix D0 has too many entries per row.
1506  // Therefore we need to check whether its entries are actually non-zero.
1507  // This is the case for the matrices built by MiniEM.
1508  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1509 
1510  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1511 
1512  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1513  auto localP = P_nodal_imported->getLocalMatrixDevice();
1514  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1515  KOKKOS_LAMBDA(const size_t i) {
1516  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1517  LO l = localD0.graph.entries(ll);
1518  SC p = localD0.values(ll);
1519  if (impl_ATS::magnitude(p) < tol)
1520  continue;
1521  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1522  LO j = localP.graph.entries(jj);
1523  SC v = localP.values(jj);
1524  for (size_t k = 0; k < dim; k++) {
1525  LO jNew = dim*j+k;
1526  SC n = localNullspace(i,k);
1527  size_t m;
1528  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1529  if (P11colind(m) == jNew)
1530  break;
1531 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1532  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1533 #endif
1534  P11vals(m) += half * v * n;
1535  }
1536  }
1537  }
1538  });
1539 
1540  } else {
1541  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1542  auto localP = P_nodal_imported->getLocalMatrixDevice();
1543  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1544  KOKKOS_LAMBDA(const size_t i) {
1545  for (size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1546  LO l = localD0.graph.entries(ll);
1547  for (size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1548  LO j = localP.graph.entries(jj);
1549  SC v = localP.values(jj);
1550  for (size_t k = 0; k < dim; k++) {
1551  LO jNew = dim*j+k;
1552  SC n = localNullspace(i,k);
1553  size_t m;
1554  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1555  if (P11colind(m) == jNew)
1556  break;
1557 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1558  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1559 #endif
1560  P11vals(m) += half * v * n;
1561  }
1562  }
1563  }
1564  });
1565  }
1566 
1567  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1568  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1569  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1570  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1571 
1572  } else
1573  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1574 
1575  NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1576 
1577  auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1578  auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1579  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1580  KOKKOS_LAMBDA(const size_t i) {
1581  Scalar val = localNullspace_nodal(i,0);
1582  for (size_t j = 0; j < dim; j++)
1583  localNullspaceH(dim*i+j, j) = val;
1584  });
1585 
1586  } else { // !skipFirstLevel_
1587  // Get data out of P_nodal_imported and D0.
1588  auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1589 
1590  CoordsH_ = Coords_;
1591 
1592  if (algo == "mat-mat") {
1593 
1594  // Create the matrix object
1595  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1596  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1597 
1598  size_t nnzEstimate = dim*localD0.graph.entries.size();
1599  lno_view_t P11rowptr("P11_rowptr", numLocalRows+1);
1600  lno_nnz_view_t P11colind("P11_colind",nnzEstimate);
1601  scalar_view_t P11vals("P11_vals",nnzEstimate);
1602 
1603  // adjust rowpointer
1604  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1605  KOKKOS_LAMBDA(const size_t i) {
1606  P11rowptr(i) = dim*localD0.graph.row_map(i);
1607  });
1608 
1609  // adjust column indices
1610  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1611  KOKKOS_LAMBDA(const size_t jj) {
1612  for (size_t k = 0; k < dim; k++) {
1613  P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1614  P11vals(dim*jj+k) = SC_ZERO;
1615  }
1616  });
1617 
1618  auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1619 
1620  // enter values
1621  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1622  // The matrix D0 has too many entries per row.
1623  // Therefore we need to check whether its entries are actually non-zero.
1624  // This is the case for the matrices built by MiniEM.
1625  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1626 
1627  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1628 
1629  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1630  KOKKOS_LAMBDA(const size_t i) {
1631  for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1632  LO j = localD0.graph.entries(jj);
1633  SC p = localD0.values(jj);
1634  if (impl_ATS::magnitude(p) < tol)
1635  continue;
1636  for (size_t k = 0; k < dim; k++) {
1637  LO jNew = dim*j+k;
1638  SC n = localNullspace(i,k);
1639  size_t m;
1640  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1641  if (P11colind(m) == jNew)
1642  break;
1643 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1644  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1645 #endif
1646  P11vals(m) += half * n;
1647  }
1648  }
1649  });
1650 
1651  } else {
1652  Kokkos::parallel_for("MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1653  KOKKOS_LAMBDA(const size_t i) {
1654  for (size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1655  LO j = localD0.graph.entries(jj);
1656  for (size_t k = 0; k < dim; k++) {
1657  LO jNew = dim*j+k;
1658  SC n = localNullspace(i,k);
1659  size_t m;
1660  for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1661  if (P11colind(m) == jNew)
1662  break;
1663 #if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1664  TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1665 #endif
1666  P11vals(m) += half * n;
1667  }
1668  }
1669  });
1670  }
1671 
1672  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1673  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1674  P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1675  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1676  } else
1677  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1678 
1679  }
1680  } else
1681 #endif // ifdef(HAVE_MUELU_KOKKOS_REFACTOR)
1682  {
1683  // get nullspace vectors
1684  ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1685  ArrayRCP<ArrayView<const SC> > nullspace(dim);
1686  for(size_t i=0; i<dim; i++) {
1687  nullspaceRCP[i] = Nullspace_->getData(i);
1688  nullspace[i] = nullspaceRCP[i]();
1689  }
1690 
1691  // Get data out of P_nodal_imported and D0.
1692  ArrayRCP<size_t> P11rowptr_RCP;
1693  ArrayRCP<LO> P11colind_RCP;
1694  ArrayRCP<SC> P11vals_RCP;
1695 
1696 
1697  // Which algorithm should we use for the construction of the special prolongator?
1698  // Option "mat-mat":
1699  // Multiply D0 * P_nodal, take graph, blow up the domain space and compute the entries.
1700  // Option "gustavson":
1701  // Loop over D0, P and nullspace and allocate directly. (Gustavson-like)
1702  // More efficient, but only available for serial node.
1703  std::string defaultAlgo = "mat-mat";
1704  std::string algo = parameterList_.get("refmaxwell: prolongator compute algorithm",defaultAlgo);
1705 
1706  if (skipFirstLevel_) {
1707 
1708  if (algo == "mat-mat") {
1709  RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1710  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,false,*P_nodal,false,*D0_P_nodal,true,true);
1711 
1712 
1713  ArrayRCP<const size_t> D0rowptr_RCP;
1714  ArrayRCP<const LO> D0colind_RCP;
1715  ArrayRCP<const SC> D0vals_RCP;
1716  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1717  // For efficiency
1718  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1719  // slower than Teuchos::ArrayView::operator[].
1720  ArrayView<const size_t> D0rowptr;
1721  ArrayView<const LO> D0colind;
1722  ArrayView<const SC> D0vals;
1723  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1724 
1725  // Get data out of P_nodal_imported and D0.
1726  ArrayRCP<const size_t> Prowptr_RCP;
1727  ArrayRCP<const LO> Pcolind_RCP;
1728  ArrayRCP<const SC> Pvals_RCP;
1729  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1730  ArrayView<const size_t> Prowptr;
1731  ArrayView<const LO> Pcolind;
1732  ArrayView<const SC> Pvals;
1733  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1734 
1735  // Get data out of D0*P.
1736  ArrayRCP<const size_t> D0Prowptr_RCP;
1737  ArrayRCP<const LO> D0Pcolind_RCP;
1738  ArrayRCP<const SC> D0Pvals_RCP;
1739  rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1740 
1741  // For efficiency
1742  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1743  // slower than Teuchos::ArrayView::operator[].
1744  ArrayView<const size_t> D0Prowptr;
1745  ArrayView<const LO> D0Pcolind;
1746  D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1747 
1748  // Create the matrix object
1749  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1750  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1751  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1752  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1753  size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1754  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1755 
1756  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1757  ArrayView<LO> P11colind = P11colind_RCP();
1758  ArrayView<SC> P11vals = P11vals_RCP();
1759 
1760  // adjust rowpointer
1761  for (size_t i = 0; i < numLocalRows+1; i++) {
1762  P11rowptr[i] = dim*D0Prowptr[i];
1763  }
1764 
1765  // adjust column indices
1766  for (size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1767  for (size_t k = 0; k < dim; k++) {
1768  P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1769  P11vals[dim*jj+k] = SC_ZERO;
1770  }
1771 
1772  RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1773  RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1774  // enter values
1775  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1776  // The matrix D0 has too many entries per row.
1777  // Therefore we need to check whether its entries are actually non-zero.
1778  // This is the case for the matrices built by MiniEM.
1779  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1780 
1781  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1782  for (size_t i = 0; i < numLocalRows; i++) {
1783  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1784  LO l = D0colind[ll];
1785  SC p = D0vals[ll];
1786  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1787  continue;
1788  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1789  LO j = Pcolind[jj];
1790  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1791  SC v = Pvals[jj];
1792  for (size_t k = 0; k < dim; k++) {
1793  LO jNew = dim*j+k;
1794  SC n = nullspace[k][i];
1795  size_t m;
1796  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1797  if (P11colind[m] == jNew)
1798  break;
1799 #ifdef HAVE_MUELU_DEBUG
1800  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1801 #endif
1802  P11vals[m] += half * v * n;
1803  }
1804  }
1805  }
1806  }
1807  } else {
1808  // enter values
1809  for (size_t i = 0; i < numLocalRows; i++) {
1810  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1811  LO l = D0colind[ll];
1812  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1813  LO j = Pcolind[jj];
1814  j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1815  SC v = Pvals[jj];
1816  for (size_t k = 0; k < dim; k++) {
1817  LO jNew = dim*j+k;
1818  SC n = nullspace[k][i];
1819  size_t m;
1820  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1821  if (P11colind[m] == jNew)
1822  break;
1823 #ifdef HAVE_MUELU_DEBUG
1824  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1825 #endif
1826  P11vals[m] += half * v * n;
1827  }
1828  }
1829  }
1830  }
1831  }
1832 
1833  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1834  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1835 
1836  } else if (algo == "gustavson") {
1837  ArrayRCP<const size_t> D0rowptr_RCP;
1838  ArrayRCP<const LO> D0colind_RCP;
1839  ArrayRCP<const SC> D0vals_RCP;
1840  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1841  // For efficiency
1842  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1843  // slower than Teuchos::ArrayView::operator[].
1844  ArrayView<const size_t> D0rowptr;
1845  ArrayView<const LO> D0colind;
1846  ArrayView<const SC> D0vals;
1847  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1848 
1849  // Get data out of P_nodal_imported and D0.
1850  ArrayRCP<const size_t> Prowptr_RCP;
1851  ArrayRCP<const LO> Pcolind_RCP;
1852  ArrayRCP<const SC> Pvals_RCP;
1853  P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1854  ArrayView<const size_t> Prowptr;
1855  ArrayView<const LO> Pcolind;
1856  ArrayView<const SC> Pvals;
1857  Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1858 
1859  LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1860  const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1861  Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1862  // This is ad-hoc and should maybe be replaced with some better heuristics.
1863  size_t nnz_alloc = dim*D0vals_RCP.size();
1864 
1865  // Create the matrix object
1866  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1867  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1868  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1869  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1870  P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1871 
1872  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1873  ArrayView<LO> P11colind = P11colind_RCP();
1874  ArrayView<SC> P11vals = P11vals_RCP();
1875 
1876  size_t nnz;
1877  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1878  // The matrix D0 has too many entries per row.
1879  // Therefore we need to check whether its entries are actually non-zero.
1880  // This is the case for the matrices built by MiniEM.
1881  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1882 
1883  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1884  nnz = 0;
1885  size_t nnz_old = 0;
1886  for (size_t i = 0; i < numLocalRows; i++) {
1887  P11rowptr[i] = nnz;
1888  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1889  LO l = D0colind[ll];
1890  SC p = D0vals[ll];
1891  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1892  continue;
1893  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1894  LO j = Pcolind[jj];
1895  SC v = Pvals[jj];
1896  for (size_t k = 0; k < dim; k++) {
1897  LO jNew = dim*j+k;
1898  SC n = nullspace[k][i];
1899  // do we already have an entry for (i, jNew)?
1900  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1901  P11_status[jNew] = nnz;
1902  P11colind[nnz] = jNew;
1903  P11vals[nnz] = half * v * n;
1904  // or should it be
1905  // P11vals[nnz] = half * n;
1906  nnz++;
1907  } else {
1908  P11vals[P11_status[jNew]] += half * v * n;
1909  // or should it be
1910  // P11vals[P11_status[jNew]] += half * n;
1911  }
1912  }
1913  }
1914  }
1915  nnz_old = nnz;
1916  }
1917  P11rowptr[numLocalRows] = nnz;
1918  } else {
1919  nnz = 0;
1920  size_t nnz_old = 0;
1921  for (size_t i = 0; i < numLocalRows; i++) {
1922  P11rowptr[i] = nnz;
1923  for (size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1924  LO l = D0colind[ll];
1925  for (size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1926  LO j = Pcolind[jj];
1927  SC v = Pvals[jj];
1928  for (size_t k = 0; k < dim; k++) {
1929  LO jNew = dim*j+k;
1930  SC n = nullspace[k][i];
1931  // do we already have an entry for (i, jNew)?
1932  if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1933  P11_status[jNew] = nnz;
1934  P11colind[nnz] = jNew;
1935  P11vals[nnz] = half * v * n;
1936  // or should it be
1937  // P11vals[nnz] = half * n;
1938  nnz++;
1939  } else {
1940  P11vals[P11_status[jNew]] += half * v * n;
1941  // or should it be
1942  // P11vals[P11_status[jNew]] += half * n;
1943  }
1944  }
1945  }
1946  }
1947  nnz_old = nnz;
1948  }
1949  P11rowptr[numLocalRows] = nnz;
1950  }
1951 
1952  if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1953  // Downward resize
1954  // - Cannot resize for Epetra, as it checks for same pointers
1955  // - Need to resize for Tpetra, as it checks ().size() == P11rowptr[numLocalRows]
1956  P11vals_RCP.resize(nnz);
1957  P11colind_RCP.resize(nnz);
1958  }
1959 
1960  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1961  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1962  } else
1963  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1964 
1965  NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1966 
1967  ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1968  ArrayView<const Scalar> ns_view = ns_rcp();
1969  for (size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1970  Scalar val = ns_view[i];
1971  for (size_t j = 0; j < dim; j++)
1972  NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1973  }
1974 
1975 
1976  } else { // !skipFirstLevel_
1977  ArrayRCP<const size_t> D0rowptr_RCP;
1978  ArrayRCP<const LO> D0colind_RCP;
1979  ArrayRCP<const SC> D0vals_RCP;
1980  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1981  // For efficiency
1982  // Refers to an issue where Teuchos::ArrayRCP::operator[] may be
1983  // slower than Teuchos::ArrayView::operator[].
1984  ArrayView<const size_t> D0rowptr;
1985  ArrayView<const LO> D0colind;
1986  ArrayView<const SC> D0vals;
1987  D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1988 
1989  CoordsH_ = Coords_;
1990  if (algo == "mat-mat") {
1991 
1992  // Create the matrix object
1993  RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1994  RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1995  P11_ = rcp(new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1996  RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1997  size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1998  P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1999 
2000  ArrayView<size_t> P11rowptr = P11rowptr_RCP();
2001  ArrayView<LO> P11colind = P11colind_RCP();
2002  ArrayView<SC> P11vals = P11vals_RCP();
2003 
2004  // adjust rowpointer
2005  for (size_t i = 0; i < numLocalRows+1; i++) {
2006  P11rowptr[i] = dim*D0rowptr[i];
2007  }
2008 
2009  // adjust column indices
2010  for (size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
2011  for (size_t k = 0; k < dim; k++) {
2012  P11colind[dim*jj+k] = dim*D0colind[jj]+k;
2013  P11vals[dim*jj+k] = SC_ZERO;
2014  }
2015 
2016  // enter values
2017  if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
2018  // The matrix D0 has too many entries per row.
2019  // Therefore we need to check whether its entries are actually non-zero.
2020  // This is the case for the matrices built by MiniEM.
2021  GetOStream(Warnings0) << "RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
2022 
2023  magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
2024  for (size_t i = 0; i < numLocalRows; i++) {
2025  for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2026  LO j = D0colind[jj];
2027  SC p = D0vals[jj];
2028  if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
2029  continue;
2030  for (size_t k = 0; k < dim; k++) {
2031  LO jNew = dim*j+k;
2032  SC n = nullspace[k][i];
2033  size_t m;
2034  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2035  if (P11colind[m] == jNew)
2036  break;
2037 #ifdef HAVE_MUELU_DEBUG
2038  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2039 #endif
2040  P11vals[m] += half * n;
2041  }
2042  }
2043  }
2044  } else {
2045  // enter values
2046  for (size_t i = 0; i < numLocalRows; i++) {
2047  for (size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2048  LO j = D0colind[jj];
2049 
2050  for (size_t k = 0; k < dim; k++) {
2051  LO jNew = dim*j+k;
2052  SC n = nullspace[k][i];
2053  size_t m;
2054  for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2055  if (P11colind[m] == jNew)
2056  break;
2057 #ifdef HAVE_MUELU_DEBUG
2058  TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2059 #endif
2060  P11vals[m] += half * n;
2061  }
2062  }
2063  }
2064  }
2065 
2066  P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
2067  P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
2068 
2069  } else
2070  TEUCHOS_TEST_FOR_EXCEPTION(false,std::invalid_argument,algo << " is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
2071  }
2072  }
2073  }
2074 
2075  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2077  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: Build coarse (1,1) matrix");
2078 
2079  const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2080 
2081  // coarse matrix for P11* (M1 + D1* M2 D1) P11
2082  RCP<Matrix> temp;
2083  temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,false,*P11_,false,temp,GetOStream(Runtime0),true,true);
2084  if (ImporterH_.is_null())
2085  AH_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,AH_,GetOStream(Runtime0),true,true);
2086  else {
2087 
2088  RCP<Matrix> temp2;
2089  temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*P11_,true,*temp,false,temp2,GetOStream(Runtime0),true,true);
2090 
2091  RCP<const Map> map = ImporterH_->getTargetMap()->removeEmptyProcesses();
2092  temp2->removeEmptyProcessesInPlace(map);
2093  if (!temp2.is_null() && temp2->getRowMap().is_null())
2094  temp2 = Teuchos::null;
2095  AH_ = temp2;
2096  }
2097 
2098  if (!disable_addon_) {
2099 
2100  RCP<Matrix> addon;
2101 
2102  if (!AH_.is_null() && Addon_Matrix_.is_null()) {
2103  // construct addon
2104  RCP<Teuchos::TimeMonitor> tmAddon = getTimer("MueLu RefMaxwell: Build coarse addon matrix");
2105  // catch a failure
2106  TEUCHOS_TEST_FOR_EXCEPTION(M0inv_Matrix_==Teuchos::null,std::invalid_argument,
2107  "MueLu::RefMaxwell::formCoarseMatrix(): Inverse of "
2108  "lumped mass matrix required for add-on (i.e. M0inv_Matrix is null)");
2109 
2110  // coarse matrix for add-on, i.e P11* (M1 D0 M0inv D0* M1) P11
2111  RCP<Matrix> Zaux, Z;
2112 
2113  // construct Zaux = M1 P11
2114  Zaux = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M1_Matrix_,false,*P11_,false,Zaux,GetOStream(Runtime0),true,true);
2115  // construct Z = D0* M1 P11 = D0* Zaux
2116  Z = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,true,*Zaux,false,Z,GetOStream(Runtime0),true,true);
2117 
2118  // construct Z* M0inv Z
2119  if (M0inv_Matrix_->getGlobalMaxNumRowEntries()<=1) {
2120  // We assume that if M0inv has at most one entry per row then
2121  // these are all diagonal entries.
2122  RCP<Vector> diag = VectorFactory::Build(M0inv_Matrix_->getRowMap());
2123  M0inv_Matrix_->getLocalDiagCopy(*diag);
2124  {
2125  ArrayRCP<Scalar> diagVals = diag->getDataNonConst(0);
2126  for (size_t j=0; j < diag->getMap()->getNodeNumElements(); j++) {
2127  diagVals[j] = Teuchos::ScalarTraits<Scalar>::squareroot(diagVals[j]);
2128  }
2129  }
2130  if (Z->getRowMap()->isSameAs(*(diag->getMap())))
2131  Z->leftScale(*diag);
2132  else {
2133  RCP<Import> importer = ImportFactory::Build(diag->getMap(),Z->getRowMap());
2134  RCP<Vector> diag2 = VectorFactory::Build(Z->getRowMap());
2135  diag2->doImport(*diag,*importer,Xpetra::INSERT);
2136  Z->leftScale(*diag2);
2137  }
2138  addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*Z,false,addon,GetOStream(Runtime0),true,true);
2139  } else if (parameterList_.get<bool>("rap: triple product", false) == false) {
2140  RCP<Matrix> C2;
2141  // construct C2 = M0inv Z
2142  C2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*M0inv_Matrix_,false,*Z,false,C2,GetOStream(Runtime0),true,true);
2143  // construct Matrix2 = Z* M0inv Z = Z* C2
2144  addon = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*Z,true,*C2,false,addon,GetOStream(Runtime0),true,true);
2145  } else {
2146  addon = MatrixFactory::Build(Z->getDomainMap());
2147  // construct Matrix2 = Z* M0inv Z
2148  Xpetra::TripleMatrixMultiply<Scalar,LocalOrdinal,GlobalOrdinal,Node>::
2149  MultiplyRAP(*Z, true, *M0inv_Matrix_, false, *Z, false, *addon, true, true);
2150  }
2151  // Should we keep the addon for next setup?
2152  if (enable_reuse_)
2153  Addon_Matrix_ = addon;
2154  } else
2155  addon = Addon_Matrix_;
2156 
2157  if (!AH_.is_null()) {
2158  // add matrices together
2159  RCP<Matrix> newAH;
2160  Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::TwoMatrixAdd(*AH_,false,one,*addon,false,one,newAH,GetOStream(Runtime0));
2161  newAH->fillComplete();
2162  AH_ = newAH;
2163  }
2164  }
2165 
2166  if (!AH_.is_null() && !skipFirstLevel_) {
2167  ArrayRCP<bool> AHBCrows;
2168  AHBCrows.resize(AH_->getRowMap()->getNodeNumElements());
2169  size_t dim = Nullspace_->getNumVectors();
2170 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
2171  if (useKokkos_)
2172  for (size_t i = 0; i < BCdomainKokkos_.size(); i++)
2173  for (size_t k = 0; k < dim; k++)
2174  AHBCrows[i*dim+k] = BCdomainKokkos_(i);
2175  else
2176 #endif
2177  for (size_t i = 0; i < static_cast<size_t>(BCdomain_.size()); i++)
2178  for (size_t k = 0; k < dim; k++)
2179  AHBCrows[i*dim+k] = BCdomain_[i];
2180  magnitudeType rowSumTol = parameterList_.get("refmaxwell: row sum drop tol (1,1)",-1.0);
2181  if (rowSumTol > 0.)
2182  Utilities::ApplyRowSumCriterion(*AH_, rowSumTol, AHBCrows);
2183  if (applyBCsToH_)
2184  Utilities::ApplyOAZToMatrixRows(AH_, AHBCrows);
2185  }
2186 
2187  if (!AH_.is_null()) {
2188  // If we already applied BCs to A_nodal, we likely do not need
2189  // to fix up AH.
2190  // If we did not apply BCs to A_nodal, we now need to correct
2191  // the zero diagonals of AH, since we did nuke the nullspace.
2192 
2193  bool fixZeroDiagonal = !applyBCsToAnodal_;
2194  if (precList11_.isParameter("rap: fix zero diagonals"))
2195  fixZeroDiagonal = precList11_.get<bool>("rap: fix zero diagonals");
2196 
2197  if (fixZeroDiagonal) {
2198  magnitudeType threshold = 1e-16;
2199  Scalar replacement = 1.0;
2200  if (precList11_.isType<magnitudeType>("rap: fix zero diagonals threshold"))
2201  threshold = precList11_.get<magnitudeType>("rap: fix zero diagonals threshold");
2202  else if (precList11_.isType<double>("rap: fix zero diagonals threshold"))
2203  threshold = Teuchos::as<magnitudeType>(precList11_.get<double>("rap: fix zero diagonals threshold"));
2204  if (precList11_.isType<double>("rap: fix zero diagonals replacement"))
2205  replacement = Teuchos::as<Scalar>(precList11_.get<double>("rap: fix zero diagonals replacement"));
2206  Xpetra::MatrixUtils<SC,LO,GO,NO>::CheckRepairMainDiagonal(AH_, true, GetOStream(Warnings1), threshold, replacement);
2207  }
2208 
2209  // Set block size
2210  size_t dim = Nullspace_->getNumVectors();
2211  AH_->SetFixedBlockSize(dim);
2212  }
2213  }
2214 
2215 
2216  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2217  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::resetMatrix(RCP<Matrix> SM_Matrix_new, bool ComputePrec) {
2218  bool reuse = !SM_Matrix_.is_null();
2219  SM_Matrix_ = SM_Matrix_new;
2220  dump(*SM_Matrix_, "SM.m");
2221  if (ComputePrec)
2222  compute(reuse);
2223  }
2224 
2225 
2226  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2227  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::applyInverseAdditive(const MultiVector& RHS, MultiVector& X) const {
2228 
2229  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2230 
2231  { // compute residual
2232 
2233  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2234  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2235  }
2236 
2237  { // restrict residual to sub-hierarchies
2238 
2239  if (implicitTranspose_) {
2240  {
2241  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2242  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2243  }
2244  if (!allNodesBoundary_) {
2245  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (implicit)");
2246  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2247  }
2248  } else {
2249 #ifdef MUELU_HAVE_TPETRA
2250  if (D0_T_R11_colMapsMatch_) {
2251  // Column maps of D0_T and R11 match, and we're running Tpetra
2252  {
2253  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restrictions import");
2254  D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2255  }
2256  if (!allNodesBoundary_) {
2257  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2258  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2259  }
2260  {
2261  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2262  rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2263  }
2264  } else
2265 #endif
2266  {
2267  {
2268  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2269  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2270  }
2271  if (!allNodesBoundary_) {
2272  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: restriction (2,2) (explicit)");
2273  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2274  }
2275  }
2276  }
2277  }
2278 
2279  {
2280  RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer("MueLu RefMaxwell: subsolves");
2281 
2282  // block diagonal preconditioner on 2x2 (V-cycle for diagonal blocks)
2283 
2284  if (!ImporterH_.is_null() && !implicitTranspose_) {
2285  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2286  P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2287  }
2288  if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2289  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2290  D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2291  }
2292 
2293  // iterate on coarse (1, 1) block
2294  if (!AH_.is_null()) {
2295  if (!ImporterH_.is_null() && !implicitTranspose_)
2296  P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2297 
2298  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2299 
2300 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2301  if (!thyraPrecH_.is_null()) {
2302  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2303  RCP<Thyra::MultiVectorBase<Scalar> > thyraP11x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11xSubComm_));
2304  RCP<const Thyra::MultiVectorBase<Scalar> > thyraP11res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11resSubComm_);
2305  thyraPrecH_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraP11res, thyraP11x.ptr(), one, zero);
2306  RCP<MultiVector> thyXpP11x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraP11x, P11x_->getMap()->getComm());
2307  *P11xSubComm_ = *thyXpP11x;
2308  }
2309  else
2310 #endif
2311  HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2312  }
2313 
2314  // iterate on (2, 2) block
2315  if (!A22_.is_null()) {
2316  if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2317  D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2318 
2319  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2320 
2321 #if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2322  if (!thyraPrec22_.is_null()) {
2323  Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2324  RCP<Thyra::MultiVectorBase<Scalar> > thyraD0x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0xSubComm_));
2325  RCP<const Thyra::MultiVectorBase<Scalar> > thyraD0res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0resSubComm_);
2326  thyraPrec22_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraD0res, thyraD0x.ptr(), one, zero);
2327  RCP<MultiVector> thyXpD0x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraD0x, D0x_->getMap()->getComm());
2328  *D0xSubComm_ = *thyXpD0x;
2329  }
2330  else
2331 #endif
2332  Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2333  }
2334 
2335  if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2336  P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2337  if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2338  D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2339  }
2340 
2341  if (fuseProlongationAndUpdate_) {
2342  { // prolongate (1,1) block
2343  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2344  P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2345  }
2346 
2347  if (!allNodesBoundary_) { // prolongate (2,2) block
2348  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (fused)");
2349  D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2350  }
2351  } else {
2352  { // prolongate (1,1) block
2353  RCP<Teuchos::TimeMonitor> tmP11 = getTimer("MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2354  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2355  }
2356 
2357  if (!allNodesBoundary_) { // prolongate (2,2) block
2358  RCP<Teuchos::TimeMonitor> tmD0 = getTimer("MueLu RefMaxwell: prolongation (2,2) (unfused)");
2359  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2360  }
2361 
2362  { // update current solution
2363  RCP<Teuchos::TimeMonitor> tmUpdate = getTimer("MueLu RefMaxwell: update");
2364  X.update(one, *residual_, one);
2365  }
2366  }
2367 
2368  }
2369 
2370 
2371  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2372  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solveH(const MultiVector& RHS, MultiVector& X) const {
2373 
2374  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2375 
2376  { // compute residual
2377  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2378  Utilities::Residual(*SM_Matrix_, X, RHS,*residual_);
2379  if (implicitTranspose_)
2380  P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2381  else
2382  R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2383  }
2384 
2385  { // solve coarse (1,1) block
2386  if (!ImporterH_.is_null() && !implicitTranspose_) {
2387  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: import coarse (1,1)");
2388  P11resTmp_->doImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2389  }
2390  if (!AH_.is_null()) {
2391  RCP<Teuchos::TimeMonitor> tmH = getTimer("MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2392  HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_, true);
2393  }
2394  }
2395 
2396  { // update current solution
2397  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2398  P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2399  X.update(one, *residual_, one);
2400  }
2401 
2402  }
2403 
2404 
2405  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2406  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::solve22(const MultiVector& RHS, MultiVector& X) const {
2407 
2408  if (allNodesBoundary_)
2409  return;
2410 
2411  Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2412 
2413  { // compute residual
2414  RCP<Teuchos::TimeMonitor> tmRes = getTimer("MueLu RefMaxwell: residual calculation");
2415  Utilities::Residual(*SM_Matrix_, X, RHS, *residual_);
2416  if (implicitTranspose_)
2417  D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2418  else
2419  D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2420  }
2421 
2422  { // solve (2,2) block
2423  if (!Importer22_.is_null() && !implicitTranspose_) {
2424  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: import (2,2)");
2425  D0resTmp_->doImport(*D0res_, *Importer22_, Xpetra::INSERT);
2426  }
2427  if (!A22_.is_null()) {
2428  RCP<Teuchos::TimeMonitor> tm22 = getTimer("MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2429  Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_, true);
2430  }
2431  }
2432 
2433  { //update current solution
2434  RCP<Teuchos::TimeMonitor> tmUp = getTimer("MueLu RefMaxwell: update");
2435  D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS);
2436  X.update(one, *residual_, one);
2437  }
2438 
2439  }
2440 
2441 
2442  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2443  void RefMaxwell<Scalar,LocalOrdinal,GlobalOrdinal,Node>::apply (const MultiVector& RHS, MultiVector& X,
2444  Teuchos::ETransp /* mode */,
2445  Scalar /* alpha */,
2446  Scalar /* beta */) const {
2447 
2448  RCP<Teuchos::TimeMonitor> tm = getTimer("MueLu RefMaxwell: solve");
2449 
2450  // make sure that we have enough temporary memory
2451  if (!allEdgesBoundary_ && X.getNumVectors() != P11res_->getNumVectors())
2452  allocateMemory(X.getNumVectors());
2453 
2454  { // apply pre-smoothing
2455 
2456  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2457 
2458 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2459  if (useHiptmairSmoothing_) {
2460  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2461  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2462  hiptmairPreSmoother_->apply(tRHS, tX);
2463  }
2464  else
2465 #endif
2466  PreSmoother_->Apply(X, RHS, use_as_preconditioner_);
2467  }
2468 
2469  // do solve for the 2x2 block system
2470  if(mode_=="additive")
2471  applyInverseAdditive(RHS,X);
2472  else if(mode_=="121") {
2473  solveH(RHS,X);
2474  solve22(RHS,X);
2475  solveH(RHS,X);
2476  } else if(mode_=="212") {
2477  solve22(RHS,X);
2478  solveH(RHS,X);
2479  solve22(RHS,X);
2480  } else if(mode_=="1")
2481  solveH(RHS,X);
2482  else if(mode_=="2")
2483  solve22(RHS,X);
2484  else if(mode_=="7") {
2485  solveH(RHS,X);
2486  { // apply pre-smoothing
2487 
2488  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2489 
2490 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2491  if (useHiptmairSmoothing_) {
2492  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2493  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2494  hiptmairPreSmoother_->apply(tRHS, tX);
2495  }
2496  else
2497 #endif
2498  PreSmoother_->Apply(X, RHS, false);
2499  }
2500  solve22(RHS,X);
2501  { // apply post-smoothing
2502 
2503  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2504 
2505 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2506  if (useHiptmairSmoothing_)
2507  {
2508  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2509  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2510  hiptmairPostSmoother_->apply(tRHS, tX);
2511  }
2512  else
2513 #endif
2514  PostSmoother_->Apply(X, RHS, false);
2515  }
2516  solveH(RHS,X);
2517  } else if(mode_=="none") {
2518  // do nothing
2519  }
2520  else
2521  applyInverseAdditive(RHS,X);
2522 
2523  { // apply post-smoothing
2524 
2525  RCP<Teuchos::TimeMonitor> tmSm = getTimer("MueLu RefMaxwell: smoothing");
2526 
2527 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2528  if (useHiptmairSmoothing_)
2529  {
2530  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tX = Utilities::MV2NonConstTpetraMV(X);
2531  Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> tRHS = Utilities::MV2TpetraMV(RHS);
2532  hiptmairPostSmoother_->apply(tRHS, tX);
2533  }
2534  else
2535 #endif
2536  PostSmoother_->Apply(X, RHS, false);
2537  }
2538 
2539  }
2540 
2541 
2542  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2544  return false;
2545  }
2546 
2547 
2548  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2550  initialize(const Teuchos::RCP<Matrix> & D0_Matrix,
2551  const Teuchos::RCP<Matrix> & Ms_Matrix,
2552  const Teuchos::RCP<Matrix> & M0inv_Matrix,
2553  const Teuchos::RCP<Matrix> & M1_Matrix,
2554  const Teuchos::RCP<MultiVector> & Nullspace,
2555  const Teuchos::RCP<RealValuedMultiVector> & Coords,
2556  Teuchos::ParameterList& List)
2557  {
2558  // some pre-conditions
2559  TEUCHOS_ASSERT(D0_Matrix!=Teuchos::null);
2560  TEUCHOS_ASSERT(Ms_Matrix!=Teuchos::null);
2561  TEUCHOS_ASSERT(M1_Matrix!=Teuchos::null);
2562 
2563 #ifdef HAVE_MUELU_DEBUG
2564  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRangeMap()));
2565  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*M1_Matrix->getRowMap()));
2566  TEUCHOS_ASSERT(M1_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2567 
2568  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRangeMap()));
2569  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*Ms_Matrix->getRowMap()));
2570  TEUCHOS_ASSERT(Ms_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getRangeMap()));
2571 
2572  TEUCHOS_ASSERT(D0_Matrix->getRangeMap()->isSameAs(*D0_Matrix->getRowMap()));
2573 
2574  if (!M0inv_Matrix) {
2575  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRowMap()));
2576  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*M0inv_Matrix->getRangeMap()));
2577  TEUCHOS_ASSERT(M0inv_Matrix->getDomainMap()->isSameAs(*D0_Matrix->getDomainMap()));
2578  }
2579 #endif
2580 
2581  HierarchyH_ = Teuchos::null;
2582  Hierarchy22_ = Teuchos::null;
2583  PreSmoother_ = Teuchos::null;
2584  PostSmoother_ = Teuchos::null;
2585  disable_addon_ = false;
2586  mode_ = "additive";
2587 
2588  // set parameters
2589  setParameters(List);
2590 
2591  if (D0_Matrix->getRowMap()->lib() == Xpetra::UseTpetra) {
2592  // We will remove boundary conditions from D0, and potentially change maps, so we copy the input.
2593  // Fortunately, D0 is quite sparse.
2594  // We cannot use the Tpetra copy constructor, since it does not copy the graph.
2595 
2596  RCP<Matrix> D0copy = MatrixFactory::Build(D0_Matrix->getRowMap(), D0_Matrix->getColMap(), 0);
2597  RCP<CrsMatrix> D0copyCrs = rcp_dynamic_cast<CrsMatrixWrap>(D0copy,true)->getCrsMatrix();
2598  ArrayRCP<const size_t> D0rowptr_RCP;
2599  ArrayRCP<const LO> D0colind_RCP;
2600  ArrayRCP<const SC> D0vals_RCP;
2601  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getAllValues(D0rowptr_RCP,
2602  D0colind_RCP,
2603  D0vals_RCP);
2604 
2605  ArrayRCP<size_t> D0copyrowptr_RCP;
2606  ArrayRCP<LO> D0copycolind_RCP;
2607  ArrayRCP<SC> D0copyvals_RCP;
2608  D0copyCrs->allocateAllValues(D0vals_RCP.size(),D0copyrowptr_RCP,D0copycolind_RCP,D0copyvals_RCP);
2609  D0copyrowptr_RCP.deepCopy(D0rowptr_RCP());
2610  D0copycolind_RCP.deepCopy(D0colind_RCP());
2611  D0copyvals_RCP.deepCopy(D0vals_RCP());
2612  D0copyCrs->setAllValues(D0copyrowptr_RCP,
2613  D0copycolind_RCP,
2614  D0copyvals_RCP);
2615  D0copyCrs->expertStaticFillComplete(D0_Matrix->getDomainMap(), D0_Matrix->getRangeMap(),
2616  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getImporter(),
2617  rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix,true)->getCrsMatrix()->getCrsGraph()->getExporter());
2618  D0_Matrix_ = D0copy;
2619  } else
2620  D0_Matrix_ = MatrixFactory::BuildCopy(D0_Matrix);
2621 
2622 
2623  M0inv_Matrix_ = M0inv_Matrix;
2624  Ms_Matrix_ = Ms_Matrix;
2625  M1_Matrix_ = M1_Matrix;
2626  Coords_ = Coords;
2627  Nullspace_ = Nullspace;
2628 
2629  dump(*D0_Matrix_, "D0_clean.m");
2630  dump(*Ms_Matrix_, "Ms.m");
2631  dump(*M1_Matrix_, "M1.m");
2632  if (!M0inv_Matrix_.is_null()) dump(*M0inv_Matrix_, "M0inv.m");
2633  if (!Coords_.is_null()) dumpCoords(*Coords_, "coords.m");
2634 
2635  }
2636 
2637 
2638  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
2640  describe(Teuchos::FancyOStream& out, const Teuchos::EVerbosityLevel /* verbLevel */) const {
2641 
2642  std::ostringstream oss;
2643 
2644  RCP<const Teuchos::Comm<int> > comm = SM_Matrix_->getDomainMap()->getComm();
2645 
2646 #ifdef HAVE_MPI
2647  int root;
2648  if (!AH_.is_null())
2649  root = comm->getRank();
2650  else
2651  root = -1;
2652 
2653  int actualRoot;
2654  reduceAll(*comm, Teuchos::REDUCE_MAX, root, Teuchos::ptr(&actualRoot));
2655  root = actualRoot;
2656 #endif
2657 
2658 
2659  oss << "\n--------------------------------------------------------------------------------\n" <<
2660  "--- RefMaxwell Summary ---\n"
2661  "--------------------------------------------------------------------------------" << std::endl;
2662  oss << std::endl;
2663 
2664  GlobalOrdinal numRows;
2665  GlobalOrdinal nnz;
2666 
2667  SM_Matrix_->getRowMap()->getComm()->barrier();
2668 
2669  numRows = SM_Matrix_->getGlobalNumRows();
2670  nnz = SM_Matrix_->getGlobalNumEntries();
2671 
2672  Xpetra::global_size_t tt = numRows;
2673  int rowspacer = 3; while (tt != 0) { tt /= 10; rowspacer++; }
2674  tt = nnz;
2675  int nnzspacer = 2; while (tt != 0) { tt /= 10; nnzspacer++; }
2676 
2677  oss << "block " << std::setw(rowspacer) << " rows " << std::setw(nnzspacer) << " nnz " << std::setw(9) << " nnz/row" << std::endl;
2678  oss << "(1, 1)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2679 
2680  if (!A22_.is_null()) {
2681  // ToDo: make sure that this is printed correctly
2682  numRows = A22_->getGlobalNumRows();
2683  nnz = A22_->getGlobalNumEntries();
2684 
2685  oss << "(2, 2)" << std::setw(rowspacer) << numRows << std::setw(nnzspacer) << nnz << std::setw(9) << as<double>(nnz) / numRows << std::endl;
2686  }
2687 
2688  oss << std::endl;
2689 
2690 #if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
2691  if (useHiptmairSmoothing_) {
2692  if (hiptmairPreSmoother_ != null && hiptmairPreSmoother_ == hiptmairPostSmoother_)
2693  oss << "Smoother both : " << hiptmairPreSmoother_->description() << std::endl;
2694  else {
2695  oss << "Smoother pre : "
2696  << (hiptmairPreSmoother_ != null ? hiptmairPreSmoother_->description() : "no smoother") << std::endl;
2697  oss << "Smoother post : "
2698  << (hiptmairPostSmoother_ != null ? hiptmairPostSmoother_->description() : "no smoother") << std::endl;
2699  }
2700 
2701  } else
2702 #endif
2703  {
2704  if (PreSmoother_ != null && PreSmoother_ == PostSmoother_)
2705  oss << "Smoother both : " << PreSmoother_->description() << std::endl;
2706  else {
2707  oss << "Smoother pre : "
2708  << (PreSmoother_ != null ? PreSmoother_->description() : "no smoother") << std::endl;
2709  oss << "Smoother post : "
2710  << (PostSmoother_ != null ? PostSmoother_->description() : "no smoother") << std::endl;
2711  }
2712  }
2713  oss << std::endl;
2714 
2715  std::string outstr = oss.str();
2716 
2717 #ifdef HAVE_MPI
2718  RCP<const Teuchos::MpiComm<int> > mpiComm = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2719  MPI_Comm rawComm = (*mpiComm->getRawMpiComm())();
2720 
2721  int strLength = outstr.size();
2722  MPI_Bcast(&strLength, 1, MPI_INT, root, rawComm);
2723  if (comm->getRank() != root)
2724  outstr.resize(strLength);
2725  MPI_Bcast(&outstr[0], strLength, MPI_CHAR, root, rawComm);
2726 #endif
2727 
2728  out << outstr;
2729 
2730  if (!HierarchyH_.is_null())
2731  HierarchyH_->describe(out, GetVerbLevel());
2732 
2733  if (!Hierarchy22_.is_null())
2734  Hierarchy22_->describe(out, GetVerbLevel());
2735 
2736  if (IsPrint(Statistics2)) {
2737  // Print the grid of processors
2738  std::ostringstream oss2;
2739 
2740  oss2 << "Sub-solver distribution over ranks" << std::endl;
2741  oss2 << "( (1,1) block only is indicated by '1', (2,2) block only by '2', and both blocks by 'B' and none by '.')" << std::endl;
2742 
2743  int numProcs = comm->getSize();
2744 #ifdef HAVE_MPI
2745  RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm);
2746  TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object.");
2747  RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm();
2748 #endif
2749 
2750  char status = 0;
2751  if (!AH_.is_null())
2752  status += 1;
2753  if (!A22_.is_null())
2754  status += 2;
2755  std::vector<char> states(numProcs, 0);
2756 #ifdef HAVE_MPI
2757  MPI_Gather(&status, 1, MPI_CHAR, &states[0], 1, MPI_CHAR, 0, *rawMpiComm);
2758 #else
2759  states.push_back(status);
2760 #endif
2761 
2762  int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100);
2763  for (int proc = 0; proc < numProcs; proc += rowWidth) {
2764  for (int j = 0; j < rowWidth; j++)
2765  if (proc + j < numProcs)
2766  if (states[proc+j] == 0)
2767  oss2 << ".";
2768  else if (states[proc+j] == 1)
2769  oss2 << "1";
2770  else if (states[proc+j] == 2)
2771  oss2 << "2";
2772  else
2773  oss2 << "B";
2774  else
2775  oss2 << " ";
2776 
2777  oss2 << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl;
2778  }
2779  oss2 << std::endl;
2780  GetOStream(Statistics2) << oss2.str();
2781  }
2782 
2783 
2784  }
2785 
2786 
2787 } // namespace
2788 
2789 #define MUELU_REFMAXWELL_SHORT
2790 #endif //ifdef MUELU_REFMAXWELL_DEF_HPP
Important warning messages (one line)
Generic Smoother Factory for generating the smoothers of the MG hierarchy.
This class specifies the default factory that should generate some data on a Level if the data does n...
#define MueLu_sumAll(rcpComm, in, out)
static void SetMueLuOFileStream(const std::string &filename)
Various adapters that will create a MueLu preconditioner that is an Xpetra::Matrix.
MueLu::DefaultLocalOrdinal LocalOrdinal
Factory for determing the number of partitions for rebalancing.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access). Usage: Level->Get< RCP<Matrix> >("A", factory) if factory == NULL => use default factory.
Factory for generating coarse level map. Used by TentativePFactory.
static RCP< const Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2TpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > const vec)
Helper utility to pull out the underlying Tpetra objects from an Xpetra object.
void solveH(const MultiVector &RHS, MultiVector &X) const
apply solve to 1-1 block only
static void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows)
#define MueLu_maxAll(rcpComm, in, out)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static void removeExplicitZeros(Teuchos::ParameterList &parameterList, RCP< Matrix > &D0_Matrix, RCP< Matrix > &SM_Matrix, RCP< Matrix > &M1_Matrix, RCP< Matrix > &Ms_Matrix)
Remove explicit zeros.
static void SetDefaultVerbLevel(const VerbLevel defaultVerbLevel)
Set the default (global) verbosity level.
Class that encapsulates external library smoothers.
Factory for building permutation matrix that can be be used to shuffle data (matrices, vectors) among processes.
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
void setFineLevelSmoother()
Set the fine level smoother.
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
One-liner description of what is happening.
std::string tolower(const std::string &str)
Namespace for MueLu classes and methods.
void SetPreviousLevel(const RCP< Level > &previousLevel)
Definition: MueLu_Level.cpp:85
Interface to Zoltan library.This interface provides access to partitioning methods in Zoltan...
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
static VerbLevel GetDefaultVerbLevel()
Get the default (global) verbosity level.
void SetFactoryManager(const RCP< const FactoryManagerBase > &factoryManager)
Set default factories (used internally by Hierarchy::SetLevel()).
Definition: MueLu_Level.cpp:92
void setParameters(Teuchos::ParameterList &list)
Set parameters.
void applyInverseAdditive(const MultiVector &RHS, MultiVector &X) const
apply additive algorithm for 2x2 solve
void buildProlongator()
Setup the prolongator for the (1,1)-block.
MueLu::DefaultNode Node
static RCP< Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > MV2NonConstTpetraMV(RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > vec)
#define MueLu_minAll(rcpComm, in, out)
Teuchos::ScalarTraits< Scalar >::magnitudeType magnitudeType
Factory for building tentative prolongator.
MsgType toVerbLevel(const std::string &verbLevelStr)
void allocateMemory(int numVectors) const
allocate multivectors for solve
Print even more statistics.
void setlib(Xpetra::UnderlyingLib lib2)
Utility functions for Maxwell.
Additional warnings.
Print statistics that do not involve significant additional computation.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::VERB_HIGH) const
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Residual(const Xpetra::Operator< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, const Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &RHS)
void dumpCoords(const RealValuedMultiVector &X, std::string name) const
dump out real-valued multivector
MueLu::DefaultScalar Scalar
void resetMatrix(Teuchos::RCP< Matrix > SM_Matrix_new, bool ComputePrec=true)
Reset system matrix.
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
static void setMatvecParams(Matrix &A, RCP< ParameterList > matvecParams)
Sets matvec params on a matrix.
static RCP< Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > RealValuedToScalarMultiVector(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LocalOrdinal, GlobalOrdinal, Node > > X)
Teuchos::RCP< const Map > getRangeMap() const
Returns the Xpetra::Map object associated with the range of this operator.
Factory to export aggregation info or visualize aggregates using VTK.
Interface to Zoltan2 library.This interface provides access to partitioning methods in Zoltan2...
static void SetMueLuOStream(const Teuchos::RCP< Teuchos::FancyOStream > &mueluOStream)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Xpetra::MultiVector< coordinateType, LO, GO, NO > RealValuedMultiVector
static void detectBoundaryConditionsSM(RCP< Matrix > &SM_Matrix, RCP< Matrix > &D0_Matrix, magnitudeType rowSumTol, int &BCedges, int &BCnodes, Teuchos::ArrayRCP< bool > &BCrows, Teuchos::ArrayRCP< bool > &BCcols, Teuchos::ArrayRCP< bool > &BCdomain, bool &allEdgesBoundary, bool &allNodesBoundary)
Detect Dirichlet boundary conditions.
AmalgamationFactory for subblocks of strided map based amalgamation data.
void AddKeepFlag(const std::string &ename, const FactoryBase *factory=NoFactory::get(), KeepType keep=MueLu::Keep)
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
static void ZeroDirichletRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const std::vector< LocalOrdinal > &dirichletRows, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
void compute(bool reuse=false)
Setup the preconditioner.
Applies permutation to grid transfer operators.
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
Print all timing information.
void initialize(const Teuchos::RCP< Matrix > &D0_Matrix, const Teuchos::RCP< Matrix > &Ms_Matrix, const Teuchos::RCP< Matrix > &M0inv_Matrix, const Teuchos::RCP< Matrix > &M1_Matrix, const Teuchos::RCP< MultiVector > &Nullspace, const Teuchos::RCP< RealValuedMultiVector > &Coords, Teuchos::ParameterList &List)
bool hasTransposeApply() const
Indicates whether this operator supports applying the adjoint operator.
Factory for creating a graph based on a given matrix.
Teuchos::RCP< Teuchos::TimeMonitor > getTimer(std::string name, RCP< const Teuchos::Comm< int > > comm=Teuchos::null) const
get a (synced) timer
void SetLevelID(int levelID)
Set level number.
Definition: MueLu_Level.cpp:78
Teuchos::RCP< MueLu::Hierarchy< Scalar, LocalOrdinal, GlobalOrdinal, Node > > CreateXpetraPreconditioner(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > op, const Teuchos::ParameterList &inParamList)
Helper function to create a MueLu preconditioner that can be used by Xpetra.Given an Xpetra::Matrix...
Class for transferring coordinates from a finer level to a coarser one.
void dump(const Matrix &A, std::string name) const
dump out matrix
Factory for building coarse matrices.
void formCoarseMatrix()
Compute P11^{T}*A*P11 efficiently.
Exception throws to report errors in the internal logical of the program.
void apply(const MultiVector &X, MultiVector &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Description of what is happening (more verbose)
static void ZeroDirichletCols(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Teuchos::ArrayRCP< const bool > &dirichletCols, Scalar replaceWith=Teuchos::ScalarTraits< Scalar >::zero())
Factory for building coarse matrices.
Teuchos::ScalarTraits< Scalar >::coordinateType coordinateType
static RCP< Tpetra::RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op2NonConstTpetraRow(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Op)
Factory for building Smoothed Aggregation prolongators.
Factory for building uncoupled aggregates.
static std::string translate(Teuchos::ParameterList &paramList, const std::string &defaultVals="")
: Translate ML parameters to MueLu parameter XML string
void solve22(const MultiVector &RHS, MultiVector &X) const
apply solve to 2-2 block only
Teuchos::RCP< const Map > getDomainMap() const
Returns the Xpetra::Map object associated with the domain of this operator.
void Request(const FactoryBase &factory)
Increment the storage counter for all the inputs of a factory.
static const RCP< const NoFactory > getRCP()
Static Get() functions.