Tulip 5.7.1
Large graphs analysis and drawing
Loading...
Searching...
No Matches
ParallelTools.h
1/*
2 *
3 * This file is part of Tulip (https://tulip.labri.fr)
4 *
5 * Authors: David Auber and the Tulip development Team
6 * from LaBRI, University of Bordeaux
7 *
8 * Tulip is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU Lesser General Public License
10 * as published by the Free Software Foundation, either version 3
11 * of the License, or (at your option) any later version.
12 *
13 * Tulip is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 * See the GNU General Public License for more details.
17 *
18 */
19
20///@cond DOXYGEN_HIDDEN
21
22#ifndef TLP_PARALLEL_TOOLS_H
23#define TLP_PARALLEL_TOOLS_H
24
25#include <tulip/tulipconf.h>
26#include <vector>
27
28#ifndef TLP_NO_THREADS
29
30#ifdef _OPENMP
31// _OPENMP is supposed to be defined as an integer
32// representing the year/month of the supported version
33#if _OPENMP < 200805
34// only signed integer types are supported
35// for OpenMP < 3.0
36typedef long long OMP_ITER_TYPE;
37#else
38typedef size_t OMP_ITER_TYPE;
39#endif
40#ifndef _MSC_VER
41#define OMP(x) _Pragma(STRINGIFY(omp x))
42#define OMP_CRITICAL_SECTION(x) _Pragma(STRINGIFY(omp critical(x)))
43#else
44#define OMP(x) __pragma(omp x)
45#define OMP_CRITICAL_SECTION(x) __pragma(omp critical(x))
46#endif
47
48#ifdef __APPLE__
49struct omp_lock_t;
50#endif
51
52#else
53
54// OpenMP no available use C++11 threads
55#include <iostream>
56#include <algorithm>
57#include <mutex>
58#include <thread>
59
60#endif
61
62#endif
63
64namespace tlp {
65
66class TlpThread;
67// ===================================================================================
68
69/**
70 * @brief Static wrapper class around std::thread
71 *
72 * @since Tulip 5.2
73 */
74class TLP_SCOPE ThreadManager {
75
76 static unsigned int maxNumberOfThreads;
77
78#if !defined(TLP_NO_THREADS) && !defined(_OPENMP)
79
80 // allocate a number for the calling thread
81 static void allocateThreadNumber();
82
83 // deallocate the number of the calling thread
84 static void freeThreadNumber();
85
86 // create a thread dedicated to the execution of a function
87 // used to iterate between begin and end indices
88 template <typename runFunction>
89 static inline std::thread *launchThread(const runFunction &f, uint begin, uint end) {
90 auto thrdFunction = [&](uint begin, uint end) {
91 allocateThreadNumber();
92 f(begin, end);
93 freeThreadNumber();
94 };
95 return new std::thread(thrdFunction, begin, end);
96 }
97
98#endif
99
100public:
101#if !defined(TLP_NO_THREADS) && !defined(_OPENMP)
102
103 // create a thread dedicated to the execution of a function
104 // with no arguments
105 template <typename runFunction>
106 static inline std::thread *launchThread(const runFunction &f) {
107 auto thrdFunction = [&]() {
108 allocateThreadNumber();
109 f();
110 freeThreadNumber();
111 };
112 return new std::thread(thrdFunction);
113 }
114
115#endif
116
117 /**
118 * Returns the number of processors on the host system.
119 */
120 static unsigned int getNumberOfProcs();
121
122 /**
123 * Returns the number of threads used by default in subsequent OpenMP parallel sections.
124 */
125 static unsigned int getNumberOfThreads();
126
127 /**
128 * Sets the number of threads used by default in subsequent parallel sections.
129 */
130 static void setNumberOfThreads(unsigned int nbThreads);
131
132 /**
133 * Returns the current thread number.
134 */
135 static unsigned int getThreadNumber();
136
137#ifndef _OPENMP
138
139 /**
140 * Parallel iteration of the same function over partitioned ranges
141 * between 0 and maxId
142 */
143 template <typename ThreadFunction>
144 static inline void iterate(size_t maxId, const ThreadFunction &threadFunction) {
145#ifndef TLP_NO_THREADS
146 if (maxId == 0)
147 return;
148
149 // compute the size of range indices for each thread
150 size_t nbPerThread = maxId == 1 ? 1 : std::max(maxId / (maxNumberOfThreads - 1), size_t(2));
151 // begin and end indices of thread partition
152 size_t begin = 0, end = nbPerThread;
153 maxId -= end - begin;
154 // create threads
155 std::vector<std::thread *> threads;
156 threads.reserve(maxNumberOfThreads - 1);
157 while (maxId) {
158 threads.push_back(launchThread(threadFunction, begin, end));
159 if (nbPerThread > 1) {
160 if (maxNumberOfThreads - threads.size() == maxId)
161 nbPerThread = 1;
162 }
163 begin = end;
164 end += std::min(nbPerThread, maxId);
165 maxId -= end - begin;
166 }
167 // iterate on last partition
168 threadFunction(begin, end);
169 // wait for threads
170 for (auto thrd : threads) {
171 thrd->join();
172 delete thrd;
173 }
174#else
175 threadFunction(0, maxId);
176#endif
177 }
178
179#endif
180};
181
182#ifndef TLP_NO_THREADS
183
184#define TLP_MAX_NB_THREADS 128
185
186#define TLP_NB_THREADS tlp::ThreadManager::getNumberOfThreads()
187
188#ifdef _OPENMP
189
190#ifdef __APPLE__
191
192// In order to workaround an odd phenomenon on some MacOS runtimes when an application
193// shutdowns, use OpenMP C API to declare critical sections instead of directives.
194// https://www.imagemagick.org/discourse-server/viewtopic.php?f=3&t=29031&start=15#p129707
195// gives more details about the issue
196
197class OpenMPLock {
198public:
199 OpenMPLock();
200 ~OpenMPLock();
201 void lock();
202 void unlock();
203
204private:
205 omp_lock_t *_lock;
206};
207
208#define TLP_LOCK_SECTION(mtx) \
209 static tlp::OpenMPLock mtx; \
210 mtx.lock();
211#define TLP_UNLOCK_SECTION(mtx) mtx.unlock();
212#define TLP_DECLARE_GLOBAL_LOCK(mtx) extern tlp::OpenMPLock mtx
213#define TLP_DEFINE_GLOBAL_LOCK(mtx) tlp::OpenMPLock mtx
214#define TLP_GLOBALLY_LOCK_SECTION(mtx) mtx.lock();
215#define TLP_GLOBALLY_UNLOCK_SECTION(mtx) mtx.unlock()
216
217#else // __APPLE__
218
219#define TLP_LOCK_SECTION(mtx) OMP_CRITICAL_SECTION(mtx)
220#define TLP_UNLOCK_SECTION(mtx)
221#define TLP_DECLARE_GLOBAL_LOCK(mtx) extern void mtx()
222#define TLP_DEFINE_GLOBAL_LOCK(mtx) extern void mtx()
223#define TLP_GLOBALLY_LOCK_SECTION(mtx) OMP_CRITICAL_SECTION(mtx)
224#define TLP_GLOBALLY_UNLOCK_SECTION(mtx)
225
226#endif // __APPLE__
227
228#else
229
230#define TLP_LOCK_SECTION(mtx) \
231 static std::mutex mtx; \
232 mtx.lock();
233#define TLP_UNLOCK_SECTION(mtx) mtx.unlock()
234#define TLP_DECLARE_GLOBAL_LOCK(mtx) extern std::mutex mtx
235#define TLP_DEFINE_GLOBAL_LOCK(mtx) std::mutex mtx
236#define TLP_GLOBALLY_LOCK_SECTION(mtx) mtx.lock();
237#define TLP_GLOBALLY_UNLOCK_SECTION(mtx) mtx.unlock()
238
239#endif
240
241#else
242
243// no multi-threading
244#define TLP_MAX_NB_THREADS 1
245
246#define TLP_NB_THREADS 1
247
248#define TLP_LOCK_SECTION(mtx)
249#define TLP_UNLOCK_SECTION(mtx)
250#define TLP_DECLARE_GLOBAL_LOCK(mtx) extern void mtx()
251#define TLP_DEFINE_GLOBAL_LOCK(mtx) extern void mtx()
252#define TLP_GLOBALLY_LOCK_SECTION(mtx)
253#define TLP_GLOBALLY_UNLOCK_SECTION(mtx)
254
255#endif
256
257// ===================================================================================
258
259/**
260 * Template function to ease the creation of parallel threads taking
261 * an index as parameter (0 <= index < maxIdx).
262 *
263 * @since Tulip 5.2
264 *
265 * @param maxIdx the upper bound exclusive of the indices range
266 * @param idxFunction callable object (e.g. lambda function) taking an unsigned integer as parameter
267 *
268 * Example of use:
269 *
270 * @code
271 * auto computationIntensiveTask = [&](unsigned int id) {
272 * double result = 0;
273 * ...
274 * return result;
275 * };
276 * const unsigned int N = ... ;
277 * std::vector<double> result(N);
278 * TLP_PARALLEL_MAP_INDICES(N, [&](unsigned int i) {
279 * // run task in a thread
280 * result[i] = computationIntensiveTask(i);
281 * });
282 * @endcode
283 */
284template <typename IdxFunction>
285void inline TLP_PARALLEL_MAP_INDICES(size_t maxIdx, const IdxFunction &idxFunction) {
286#ifdef _OPENMP
287 OMP(parallel for)
288 for (OMP_ITER_TYPE i = 0; i < OMP_ITER_TYPE(maxIdx); ++i) {
289 idxFunction(i);
290 }
291#else
292 auto threadFunction = [&](size_t begin, size_t end) {
293 for (; begin < end; ++begin) {
294 idxFunction(begin);
295 }
296 };
297 ThreadManager::iterate(maxIdx, threadFunction);
298#endif
299}
300
301template <typename EltType, typename IdxFunction>
302void inline TLP_PARALLEL_MAP_VECTOR(const std::vector<EltType> &vect,
303 const IdxFunction &idxFunction) {
304#ifdef _OPENMP
305 auto maxIdx = vect.size();
306 OMP(parallel for)
307 for (OMP_ITER_TYPE i = 0; i < OMP_ITER_TYPE(maxIdx); ++i) {
308 idxFunction(vect[i]);
309 }
310#else
311 auto threadFunction = [&](size_t begin, size_t end) {
312 for (; begin < end; ++begin) {
313 idxFunction(vect[begin]);
314 }
315 };
316
317 ThreadManager::iterate(vect.size(), threadFunction);
318#endif
319}
320
321template <typename EltType, typename IdxFunction>
322void inline TLP_PARALLEL_MAP_VECTOR_AND_INDICES(const std::vector<EltType> &vect,
323 const IdxFunction &idxFunction) {
324#ifdef _OPENMP
325 auto maxIdx = vect.size();
326 OMP(parallel for)
327 for (OMP_ITER_TYPE i = 0; i < OMP_ITER_TYPE(maxIdx); ++i) {
328 idxFunction(vect[i], i);
329 }
330#else
331 auto threadFunction = [&](size_t begin, size_t end) {
332 for (; begin < end; ++begin) {
333 idxFunction(vect[begin], begin);
334 }
335 };
336
337 ThreadManager::iterate(vect.size(), threadFunction);
338#endif
339}
340
341template <typename F1, typename F2>
342void inline TLP_PARALLEL_SECTIONS(const F1 &f1, const F2 &f2) {
343#ifndef TLP_NO_THREADS
344#ifdef _OPENMP
345 OMP(parallel) {
346 OMP(sections nowait) {
347 OMP(section) {
348 f1();
349 }
350 }
351 OMP(master) {
352 f2();
353 }
354 }
355#else
356 auto thrd = ThreadManager::launchThread(f1);
357 f2();
358 thrd->join();
359 delete thrd;
360#endif
361#else
362 f1();
363 f2();
364#endif
365}
366
367template <typename F1, typename F2, typename F3>
368void inline TLP_PARALLEL_SECTIONS(const F1 &f1, const F2 &f2, const F3 &f3) {
369#ifndef TLP_NO_THREADS
370#ifdef _OPENMP
371 OMP(parallel) {
372 OMP(sections nowait) {
373 OMP(section) {
374 f1();
375 }
376 OMP(section) {
377 f2();
378 }
379 }
380 OMP(master) {
381 f3();
382 }
383 }
384#else
385 auto thrd1 = ThreadManager::launchThread(f1);
386 auto thrd2 = ThreadManager::launchThread(f2);
387 f3();
388 thrd1->join();
389 thrd2->join();
390 delete thrd1;
391 delete thrd2;
392#endif
393#else
394 f1();
395 f2();
396 f3();
397#endif
398}
399
400template <typename F1, typename F2, typename F3, typename F4>
401void inline TLP_PARALLEL_SECTIONS(const F1 &f1, const F2 &f2, const F3 &f3, const F4 &f4) {
402#ifndef TLP_NO_THREADS
403#ifdef _OPENMP
404 OMP(parallel) {
405 OMP(sections nowait) {
406 OMP(section) {
407 f1();
408 }
409 OMP(section) {
410 f2();
411 }
412 OMP(section) {
413 f3();
414 }
415 }
416 OMP(master) {
417 f4();
418 }
419 }
420#else
421 auto thrd1 = ThreadManager::launchThread(f1);
422 auto thrd2 = ThreadManager::launchThread(f2);
423 auto thrd3 = ThreadManager::launchThread(f3);
424 f4();
425 thrd1->join();
426 thrd2->join();
427 thrd3->join();
428 delete thrd1;
429 delete thrd2;
430 delete thrd3;
431#endif
432#else
433 f1();
434 f2();
435 f3();
436 f4();
437#endif
438}
439} // namespace tlp
440
441#endif // TLP_PARALLEL_TOOLS_H
442
443///@endcond