MueLu  Version of the Day
MueLu_RepartitionHeuristicFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 // Tobias Wiesner (tawiesn@sandia.gov)
43 //
44 // ***********************************************************************
45 //
46 // @HEADER
47 #ifndef PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
48 #define PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_
49 
50 
51 #include <algorithm>
52 #include <iostream>
53 #include <sstream>
54 
55 #ifdef HAVE_MPI
56 #include <Teuchos_DefaultMpiComm.hpp>
57 #include <Teuchos_CommHelpers.hpp>
58 
59 //#include <Xpetra_Map.hpp>
60 #include <Xpetra_Matrix.hpp>
61 
62 #include "MueLu_Utilities.hpp"
63 
64 #include "MueLu_RAPFactory.hpp"
65 #ifdef HAVE_MUELU_EXPERIMENTAL
66 #include "MueLu_BlockedRAPFactory.hpp"
67 #endif
68 #include "MueLu_SubBlockAFactory.hpp"
69 #include "MueLu_Level.hpp"
70 #include "MueLu_MasterList.hpp"
71 #include "MueLu_Monitor.hpp"
72 
73 #include "MueLu_RepartitionHeuristicFactory.hpp"
74 
75 namespace MueLu {
76 
77  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
79  RCP<ParameterList> validParamList = rcp(new ParameterList());
80 
81 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
82  SET_VALID_ENTRY("repartition: start level");
83  SET_VALID_ENTRY("repartition: min rows per proc");
84  SET_VALID_ENTRY("repartition: max imbalance");
85 #undef SET_VALID_ENTRY
86 
87  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Factory of the matrix A");
88 
89  return validParamList;
90  }
91 
92  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
94  Input(currentLevel, "A");
95  }
96 
97  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
99  FactoryMonitor m(*this, "Build", currentLevel);
100 
101  const Teuchos::ParameterList & pL = GetParameterList();
102  // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation.
103  // TODO (JG): I don't really know if we want to do this.
104  const int startLevel = pL.get<int> ("repartition: start level");
105  const LO minRowsPerProcessor = pL.get<LO> ("repartition: min rows per proc");
106  const double nonzeroImbalance = pL.get<double>("repartition: max imbalance");
107 
108  RCP<const FactoryBase> Afact = GetFactory("A");
109  if(Teuchos::rcp_dynamic_cast<const RAPFactory>(Afact) == Teuchos::null &&
110 #ifdef HAVE_MUELU_EXPERIMENTAL
111  Teuchos::rcp_dynamic_cast<const BlockedRAPFactory>(Afact) == Teuchos::null &&
112 #endif
113  Teuchos::rcp_dynamic_cast<const SubBlockAFactory>(Afact) == Teuchos::null)
114  GetOStream(Warnings) <<
115  "MueLu::RepartitionHeuristicFactory::Build: The generation factory for A must " \
116  "be a RAPFactory or a SubBlockAFactory providing the non-rebalanced matrix information! " \
117  "It specifically must not be of type Rebalance(Blocked)AcFactory or similar. " \
118  "Please check the input. Make also sure that \"number of partitions\" is provided to" \
119  "the Interface class and the RepartitionFactory instance." << std::endl;
120 
121  // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types.
122  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
123 
124  // ======================================================================================================
125  // Determine whether partitioning is needed
126  // ======================================================================================================
127  // NOTE: most tests include some global communication, which is why we currently only do tests until we make
128  // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to
129  // rebalance an operator. So, it would probably be beneficial to do and report *all* tests.
130 
131  // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning
132  if (currentLevel.GetLevelID() < startLevel) {
133  GetOStream(Statistics1) << "Repartitioning? NO:" <<
134  "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) <<
135  ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl;
136 
137  // a negative number of processors means: no repartitioning
138  Set(currentLevel, "number of partitions", -1);
139 
140  return;
141  }
142 
143  RCP<const Map> rowMap = A->getRowMap();
144 
145  // NOTE: Teuchos::MPIComm::duplicate() calls MPI_Bcast inside, so this is
146  // a synchronization point. However, as we do MueLu_sumAll afterwards anyway, it
147  // does not matter.
148  RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm();
149  RCP<const Teuchos::Comm<int> > comm = origComm->duplicate();
150 
151  // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A
152  // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created)
153  // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size
154  {
155  int numActiveProcesses = 0;
156  MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses);
157 
158  if (numActiveProcesses == 1) {
159  GetOStream(Statistics1) << "Repartitioning? NO:" <<
160  "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl;
161 
162  Set(currentLevel, "number of partitions", 1);
163  return;
164  }
165  }
166 
167  bool test3 = false, test4 = false;
168  std::string msg3, msg4;
169 
170  // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement
171  // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3)
172  if (minRowsPerProcessor > 0) {
173  LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max();
174  LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0;
175  MueLu_sumAll(comm, haveFewRows, numWithFewRows);
176  MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows);
177 
178  // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some
179  // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements.
180  // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement.
181  if (numWithFewRows > 0)
182  test3 = true;
183 
184  msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcessor);
185  }
186 
187  // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold
188  if (!test3) {
189  GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries());
190  MueLu_maxAll(comm, numMyNnz, maxNnz);
191  MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors
192  double imbalance = Teuchos::as<double>(maxNnz)/minNnz;
193 
194  if (imbalance > nonzeroImbalance)
195  test4 = true;
196 
197  msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance);
198  }
199 
200  if (!test3 && !test4) {
201  GetOStream(Statistics1) << "Repartitioning? NO:" << msg3 + msg4 << std::endl;
202 
203  // A negative number of partitions means: no repartitioning
204  Set(currentLevel, "number of partitions", -1);
205  return;
206  }
207 
208  GetOStream(Statistics1) << "Repartitioning? YES:" << msg3 + msg4 << std::endl;
209 
210  // ======================================================================================================
211  // Calculate number of partitions
212  // ======================================================================================================
213  // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML)
214  // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold.
215 
216  // The number of partitions is calculated by the RepartitionFactory and stored in "number of partitions" variable on
217  // the current level. If this variable is already set (e.g., by another instance of RepartitionFactory) then this number
218  // is used. The "number of partitions" variable serves as basic communication between the RepartitionFactory (which
219  // requests a certain number of partitions) and the *Interface classes which call the underlying partitioning algorithms
220  // and produce the "Partition" array with the requested number of partitions.
221  int numPartitions;
222  if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) {
223  // System is too small, migrate it to a single processor
224  numPartitions = 1;
225 
226  } else {
227  // Make sure that each processor has approximately minRowsPerProcessor
228  numPartitions = A->getGlobalNumRows() / minRowsPerProcessor;
229  }
230  numPartitions = std::min(numPartitions, comm->getSize());
231 
232  Set(currentLevel, "number of partitions", numPartitions);
233 
234  GetOStream(Statistics1) << "Number of partitions to use = " << numPartitions << std::endl;
235  } // Build
236 } // namespace MueLu
237 
238 #endif //ifdef HAVE_MPI
239 #endif /* PACKAGES_MUELU_SRC_REBALANCING_MUELU_REPARTITIONHEURISTICFACTORY_DEF_HPP_ */
#define MueLu_sumAll(rcpComm, in, out)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
#define MueLu_maxAll(rcpComm, in, out)
void DeclareInput(Level &currentLevel) const
Determines the data that RepartitionHeuristicFactory needs, and the factories that generate that data...
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
void Build(Level &currentLevel) const
Build an object with this factory.
#define SET_VALID_ENTRY(name)
Namespace for MueLu classes and methods.
#define MueLu_minAll(rcpComm, in, out)
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Print all warning messages.