Rheolef  7.2
an efficient C++ finite element environment
 
Loading...
Searching...
No Matches
solver_pastix_mpi.cc
Go to the documentation of this file.
1
21// direct solver based on pastix
22//
23#include "rheolef/config.h"
24#if defined(_RHEOLEF_HAVE_PASTIX) && defined(_RHEOLEF_HAVE_MPI)
25#include "solver_pastix.h"
26#include "rheolef/cg.h"
27#include "rheolef/eye.h"
28
29namespace rheolef {
30using namespace std;
31
32template<class T>
33solver_pastix_rep<T,distributed>::solver_pastix_rep (const csr<T>& a, const solver_option& opt)
34 : base(),
35 _comm(),
36 _i2dis_i_base(),
37 _new_n(0),
38 _new_ptr(0),
39 _new_idx(0),
40 _new_val(0)
41{
42warning_macro ("call load...");
43 load (a, opt);
44warning_macro ("return load...");
45}
46template<class T>
47void
48solver_pastix_rep<T,distributed>::load (const csr<T>& a, const solver_option& opt)
49{
50warning_macro ("load...");
51 base::_is_sym = a.is_symmetric();
52 base::_pattern_dimension = a.pattern_dimension();
53 _comm = a.row_ownership().comm();
54 base::_csr_row_ownership = a.row_ownership();
55 base::_csr_col_ownership = a.col_ownership();
56 base::_opt = opt;
57
58 check_macro (a.nrow() == a.ncol(), "factorization: only square matrix are supported");
59
60 // there is a bug in pastix related to "np < a.dis_nrow" : use an alternative solver...
61 if (a.nrow() == 0) {
62 base::_have_pastix_bug_small_matrix = true;
63 }
64 // ask if other procs have a bug ?
65 base::_have_pastix_bug_small_matrix = mpi::all_reduce (comm(), base::_have_pastix_bug_small_matrix, mpi::maximum<bool>());
66 if (base::_have_pastix_bug_small_matrix) {
67 // one of the procs at least have a too small matrix for pastix !
68 base::_a_when_bug = a;
69 return;
70 }
71 if (base::_is_sym) {
72 load_symmetric(a);
73 } else {
74 load_unsymmetric(a);
75 }
76 if (base::_opt.do_check) {
77 check ();
78 }
79 symbolic_factorization ();
80 numeric_factorization();
81warning_macro ("load done");
82}
83template<class T>
84void
85solver_pastix_rep<T,distributed>::load_unsymmetric (const csr<T>& a)
86{
87 size_t n = a.nrow();
88 size_t nnz = a.nnz() + a.ext_nnz();
89 resize (n, nnz);
90 load_both_continued (a);
91}
92// compute how many a.dia (dis_i,dis_j) have dis_i > dis_j
93template<class T>
94static
95size_t
96compute_csr_upper_dia_nnz (const csr<T>& a)
97{
98 size_t csr_upper_dia_nnz = 0;
99 size_t first_dis_i = a.row_ownership().first_index();
100 size_t first_dis_j = a.col_ownership().first_index();
101 for (size_t i = 0; i < a.nrow(); i++) {
102 size_t dis_i = first_dis_i + i;
103 typename csr<T>::const_iterator ia = a.begin();
104 for (typename csr<T>::const_data_iterator ap = ia[i]; ap < ia[i+1]; ap++) {
105 size_t j = (*ap).first;
106 size_t dis_j = first_dis_j + j;
107 if (dis_i <= dis_j) csr_upper_dia_nnz++;
108 }
109 }
110 return csr_upper_dia_nnz;
111}
112// compute how many a.ext (dis_i,pa_j) have dis_i > dis_j
113template<class T>
114static
115size_t
116compute_csr_upper_ext_nnz (const csr<T>& a)
117{
118 size_t csr_upper_ext_nnz = 0;
119 size_t first_dis_i = a.row_ownership().first_index();
120 size_t first_dis_j = a.col_ownership().first_index();
121 for (size_t i = 0; i < a.nrow(); i++) {
122 size_t dis_i = first_dis_i + i;
123 typename csr<T>::const_iterator ext_ia = a.ext_begin();
124 for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
125 const size_t& jext = (*ap).first;
126 size_t dis_j = a.jext2dis_j (jext);
127 if (dis_i <= dis_j) csr_upper_ext_nnz++;
128 }
129 }
130 return csr_upper_ext_nnz;
131}
132template<class T>
133void
134solver_pastix_rep<T,distributed>::load_symmetric (const csr<T>& a)
135{
136 // conserve only the lower part of the csc(pastix) = the upper past of the csr(rheolef)
137 size_t n = a.nrow();
138 size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
139 size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
140 size_t nnz = csr_upper_dia_nnz + csr_upper_ext_nnz;
141 resize (n, nnz);
142 load_both_continued (a);
143}
144template<class T>
145void
146solver_pastix_rep<T,distributed>::load_both_continued (const csr<T>& a)
147{
148 size_t first_dis_i = a.row_ownership().first_index();
149 size_t first_dis_j = a.col_ownership().first_index();
150 typename csr<T>::const_iterator aptr = a.begin();
151 pastix_int_t bp = 0;
152 base::_ptr [0] = bp + base::_base;
153 for (size_t i = 0; i < a.nrow(); i++) {
154 size_t dis_i = first_dis_i + i;
155 _i2dis_i_base [i] = dis_i + base::_base;
156 for (typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
157 size_t j = (*ap).first;
158 const T& val = (*ap).second;
159 size_t dis_j = first_dis_j + j;
160 if (base::_is_sym && dis_i > dis_j) continue;
161 base::_val [bp] = val;
162 base::_idx [bp] = dis_j + base::_base;
163 bp++;
164 }
165 // note: pas tries par j croissant : dans a.ext, il i a des dis_j < first_dis_j
166 typename csr<T>::const_iterator ext_ia = a.ext_begin();
167 for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
168 size_t jext = (*ap).first;
169 const T& val = (*ap).second;
170 size_t dis_j = a.jext2dis_j (jext);
171 if (base::_is_sym && dis_i > dis_j) continue;
172 base::_val [bp] = val;
173 base::_idx [bp] = dis_j + base::_base;
174 bp++;
175 }
176 base::_ptr [i+1] = bp + base::_base;
177 }
178 check_macro (bp == base::_nnz, "factorization: invalid count: nnz="<<base::_nnz<<", count="<<bp);
179}
180template<class T>
181void
182solver_pastix_rep<T,distributed>::update_values (const csr<T>& a)
183{
184 if (base::_have_pastix_bug_small_matrix) {
185 base::_a_when_bug = a;
186 return;
187 }
188 check_macro (size_t(base::_n) == a.nrow() && size_t(base::_n) == a.ncol(),
189 "update: local input matrix size distribution mismatch: ("<<a.nrow()<<","<<a.ncol()<<"), expect ("
190 << base::_n << "," << base::_n << ")");
191 size_t nnz_a;
192 if (!base::_is_sym) {
193 nnz_a = a.nnz() + a.ext_nnz();
194 } else {
195 // conserve only the lower part of the csc(pastix) = the upper past of the csr(rheolef)
196 size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
197 size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
198 nnz_a = csr_upper_dia_nnz + csr_upper_ext_nnz;
199 }
200 check_macro (size_t(base::_nnz) == nnz_a,
201 "update: local input matrix nnz distribution mismatch: nnz="<<nnz_a<<", expect nnz="<<base::_nnz);
202
203 size_t first_dis_i = a.row_ownership().first_index();
204 size_t first_dis_j = a.col_ownership().first_index();
205 pastix_int_t bp = 0;
206 typename csr<T>::const_iterator aptr = a.begin();
207 for (size_t i = 0; i < a.nrow(); i++) {
208 size_t dis_i = first_dis_i + i;
209 for (typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
210 size_t j = (*ap).first;
211 size_t dis_j = first_dis_j + j;
212 if (base::_is_sym && dis_i > dis_j) continue;
213 base::_val [bp] = (*ap).second;
214 bp++;
215 }
216 typename csr<T>::const_iterator ext_ia = a.ext_begin();
217 for (typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
218 size_t jext = (*ap).first;
219 size_t dis_j = a.jext2dis_j (jext);
220 if (base::_is_sym && dis_i > dis_j) continue;
221 base::_val [bp] = (*ap).second;
222 bp++;
223 }
224 }
225 numeric_factorization();
226}
227template<class T>
228void
229solver_pastix_rep<T,distributed>::resize (pastix_int_t n, pastix_int_t nnz)
230{
231 base::_n = n;
232 base::_nnz = nnz;
233 base::_ptr.resize(n+1);
234 base::_idx.resize(nnz);
235 base::_val.resize(nnz);
236 _i2dis_i_base.resize(n);
237}
238template<class T>
239void
240solver_pastix_rep<T,distributed>::check () const
241{
248 pastix_int_t symmetry = (is_symmetric() ? API_SYM_YES : API_SYM_NO);
249 pastix_int_t *ptr_begin = (pastix_int_t*) base::_ptr.begin().operator->();
250 pastix_int_t *idx_begin = (pastix_int_t*) base::_idx.begin().operator->();
251 T *val_begin = (T*) base::_val.begin().operator->();
252 pastix_int_t *i2dis_i_base_begin = (pastix_int_t*) _i2dis_i_base.begin().operator->();
253 pastix_int_t status
254 = d_pastix_checkMatrix(_comm, base::_opt.verbose_level, symmetry, API_YES,
255 base::_n, &ptr_begin, &idx_begin, &val_begin, &i2dis_i_base_begin,
256 1);
257 check_macro (status == 0, "pastix check returns error status = " << status);
258}
259template<class T>
260void
261solver_pastix_rep<T,distributed>::symbolic_factorization ()
262{
263 if (base::_have_pastix_bug_small_matrix) {
264 return;
265 }
266 const pastix_int_t nbthread = 1; // threads are not yet very well supported with openmpi & scotch
267 const pastix_int_t ordering = 0; // scotch
268
269 // tasks :
270 // 0. set params to default values
271 base::_iparm[IPARM_START_TASK] = API_TASK_INIT;
272 base::_iparm[IPARM_END_TASK] = API_TASK_INIT;
273 base::_iparm[IPARM_MODIFY_PARAMETER] = API_NO;
274 d_dpastix (base::pp_data(),
275 _comm,
276 base::_n,
277 base::_ptr.begin().operator->(),
278 base::_idx.begin().operator->(),
279 NULL, // _val.begin().operator->(),
280 _i2dis_i_base.begin().operator->(),
281 NULL,
282 NULL,
283 NULL,
284 1,
285 base::_iparm,
286 base::_dparm);
287
288 // Customize some parameters
289 base::_iparm[IPARM_THREAD_NBR] = nbthread;
290 if (is_symmetric()) {
291 base::_iparm[IPARM_SYM] = API_SYM_YES;
292 base::_iparm[IPARM_FACTORIZATION] = API_FACT_LDLT;
293 } else {
294 base::_iparm[IPARM_SYM] = API_SYM_NO;
295 base::_iparm[IPARM_FACTORIZATION] = API_FACT_LU;
296 }
297 base::_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
298 base::_iparm[IPARM_VERBOSE] = base::_opt.verbose_level;
299 base::_iparm[IPARM_ORDERING] = ordering;
300 bool do_incomplete;
301 if (base::_opt.iterative != solver_option::decide) {
302 do_incomplete = (base::_opt.iterative != 0);
303 } else {
304 do_incomplete = (base::_pattern_dimension > 2); // 3d-pattern => iterative & IC(k) precond
305 }
306 base::_iparm[IPARM_INCOMPLETE] = (do_incomplete ? 1 : 0);
307 base::_iparm[IPARM_OOC_LIMIT] = base::_opt.ooc;
308 if (base::_opt.iterative == 1) {
309 base::_dparm[DPARM_EPSILON_REFINEMENT] = base::_opt.tol;
310 }
311 base::_iparm[IPARM_LEVEL_OF_FILL] = base::_opt.level_of_fill;
312 base::_iparm[IPARM_AMALGAMATION_LEVEL] = base::_opt.amalgamation;
313 base::_iparm[IPARM_RHS_MAKING] = API_RHS_B;
314
315 // 1) get the i2new_dis_i array: its indexes are in the [0:dis_n[ range
316 // i2new_dis_i[i] = i2new_dis_i [i]
317 base::_i2new_dis_i.resize (base::_n);
318
319 pastix_int_t itmp = 0; // pastix does not manage NULL pointers: when n=0, vector::begin() retuns a NULL...
320 T vtmp = 0; // pastix does not manage NULL pointers: when n=0, vector::begin() retuns a NULL...
321
322 // tasks :
323 // 1. ordering
324 // 2. symbolic factorization
325 // 3. tasks mapping and scheduling
326 base::_iparm[IPARM_START_TASK] = API_TASK_ORDERING;
327 base::_iparm[IPARM_END_TASK] = API_TASK_ANALYSE;
328
329 d_dpastix (base::pp_data(),
330 _comm,
331 base::_n,
332 base::_ptr.begin().operator->(),
333 (base::_idx.begin().operator->() != NULL) ? base::_idx.begin().operator->() : &itmp,
334 NULL,
335 (_i2dis_i_base.begin().operator->() != NULL) ? _i2dis_i_base.begin().operator->() : &itmp,
336 (base::_i2new_dis_i.begin().operator->() != NULL) ? base::_i2new_dis_i.begin().operator->() : &itmp,
337 NULL,
338 NULL,
339 1,
340 base::_iparm,
341 base::_dparm);
342
343 _new_n = d_pastix_getLocalNodeNbr (base::pp_data());
344
345 // 2) buil new local to global column correspondance
346 // new_i2dis_i_base[new_i] - base = new_i2dis_i [new_i] with a base=1
347 base::_new_i2dis_i_base.resize (_new_n);
348 d_pastix_getLocalNodeLst (base::pp_data(), base::_new_i2dis_i_base.begin().operator->());
349}
350template<class T>
351void
352solver_pastix_rep<T,distributed>::numeric_factorization ()
353{
354 if (base::_have_pastix_bug_small_matrix) {
355 trace_macro ("num_fact: bug, circumvent !");
356 return;
357 }
358 // 3) redistributing the matrix in the new numbering
359 pastix_int_t status2 = d_cscd_redispatch (
360 base::_n,
361 base::_ptr.begin().operator->(),
362 base::_idx.begin().operator->(),
363 base::_val.begin().operator->(),
364 NULL,
365 _i2dis_i_base.begin().operator->(),
366 _new_n,
367 &(_new_ptr),
368 &(_new_idx),
369 &(_new_val),
370 NULL,
371 base::_new_i2dis_i_base.begin().operator->(),
372 _comm);
373 check_macro (status2 == EXIT_SUCCESS, "d_csd_redispatch failed");
374
375 // pastix tasks:
376 // 4. numerical factorization
377 base::_iparm[IPARM_START_TASK] = API_TASK_NUMFACT;
378 base::_iparm[IPARM_END_TASK] = API_TASK_NUMFACT;
379 d_dpastix (base::pp_data(),
380 _comm,
381 _new_n,
382 _new_ptr,
383 _new_idx,
384 _new_val,
385 base::_new_i2dis_i_base.begin().operator->(),
386 base::_i2new_dis_i.begin().operator->(),
387 NULL,
388 NULL,
389 0,
390 base::_iparm,
391 base::_dparm);
392}
393template<class T>
394vec<T>
395solver_pastix_rep<T,distributed>::trans_solve (const vec<T>& rhs) const
396{
397 if (base::_have_pastix_bug_small_matrix) {
398 error_macro ("trans_solve: bug, circumvent not yet !");
399 }
400 // ================================================================
401 // solve
402 // ================================================================
403 check_macro (rhs.size() == size_t(base::_n), "invalid rhs size="<<rhs.size()<<": expect size="<<base::_n);
404
405 // redispatch rhs separately: formally: new_rhs [new_i] := rhs [i2new_i[i]]
406 base::_new_rhs.resize (_new_n);
407 std::set<size_t> dis_i_set;
408 std::map<size_t,size_t> dis_i2new_i;
409 size_t first_dis_i = base::_csr_row_ownership.first_index();
410 size_t last_dis_i = base::_csr_row_ownership.last_index();
411 for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
412 size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
413 if (dis_i >= first_dis_i && dis_i < last_dis_i) {
414 // value is locally available
415 size_t i = dis_i - first_dis_i;
416 base::_new_rhs [new_i] = rhs [i];
417 } else {
418 // value is owned by another processor
419 dis_i_set.insert (dis_i);
420 dis_i2new_i.insert (std::make_pair(dis_i,new_i));
421 }
422 }
423 // external terms: TODO: scatter begin&end in the symbolic step
424 std::map<size_t,T> dis_i_map;
425 rhs.get_dis_entry (dis_i_set, dis_i_map);
426 for (typename map<size_t,T>::const_iterator
427 iter = dis_i_map.begin(),
428 last = dis_i_map.end(); iter != last; iter++) {
429 size_t dis_i = (*iter).first;
430 const T& val = (*iter).second;
431 size_t new_i = dis_i2new_i [dis_i]; // find
432 base::_new_rhs [new_i] = val;
433 }
434 // tasks:
435 // 5. numerical solve
436 // 6. numerical refinement
437 // 7. clean
438 T vtmp = 0; // pastix does not manage NULL rhs pointer: when new_n=0, but vector::begin() retuns a NULL...
439 base::_iparm[IPARM_START_TASK] = API_TASK_SOLVE;
440 base::_iparm[IPARM_END_TASK] = API_TASK_REFINE;
441 d_dpastix (base::pp_data(),
442 _comm,
443 _new_n,
444 _new_ptr,
445 _new_idx,
446 _new_val,
447 base::_new_i2dis_i_base.begin().operator->(),
448 base::_i2new_dis_i.begin().operator->(),
449 NULL,
450 (_new_n != 0) ? base::_new_rhs.begin().operator->() : &vtmp,
451 1,
452 base::_iparm,
453 base::_dparm);
454
455 // new_i2dis_i_base [new_i] - base = new_i2dis_i [new_i]
456 vec<T> x (base::_csr_row_ownership);
457 for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
458 size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
459 x.dis_entry (dis_i) = base::_new_rhs[new_i];
460 }
461 x.dis_entry_assembly();
462 return x;
463}
464template<class T>
465vec<T>
466solver_pastix_rep<T,distributed>::solve (const vec<T>& rhs) const
467{
468 if (base::_have_pastix_bug_small_matrix) {
469 T tol = std::numeric_limits<T>::epsilon();
470 size_t max_iter = 1000; // TODO: use _opt to get tol & max_iter
471 vec<T> x (base::_a_when_bug.col_ownership(), 0);
472 int status = cg (base::_a_when_bug, x, rhs, eye<T>(), max_iter, tol, &derr);
473 check_macro (tol < std::numeric_limits<T>::epsilon(), "solver(cg): machine precision not reached: "<<tol);
474 return x;
475 }
476 // TODO: make a csc<T> wrapper around csr<T> and use csc<T> in form & form_assembly
477 // => avoid the transposition
478 if (! base::_is_sym) {
479 warning_macro ("solve: not yet supported for unsymmetric matrix");
480 }
481 return trans_solve (rhs);
482}
483template<class T>
484solver_pastix_rep<T,distributed>::~solver_pastix_rep()
485{
486 // tasks:
487 // 7. clean
488 base::_iparm[IPARM_START_TASK] = API_TASK_CLEAN;
489 base::_iparm[IPARM_END_TASK] = API_TASK_CLEAN;
490
491 d_dpastix (base::pp_data(),
492 _comm,
493 _new_n,
494 _new_ptr,
495 _new_idx,
496 _new_val,
497 NULL,
498 NULL,
499 NULL,
500 base::_new_rhs.begin().operator->(),
501 1,
502 base::_iparm,
503 base::_dparm);
504
505 // was allocated by d_cscd_redispatch()
506 if (_new_ptr != NULL) free (_new_ptr);
507 if (_new_idx != NULL) free (_new_idx);
508 if (_new_val != NULL) free (_new_val);
509 base::_pastix_data = 0;
510}
511// ----------------------------------------------------------------------------
512// instanciation in library
513// ----------------------------------------------------------------------------
514// TODO: code is only valid here for T=double (d_pastix etc)
515
516template class solver_pastix_rep<double,distributed>;
517
518} // namespace rheolef
519#endif // _RHEOLEF_HAVE_PASTIX
#define trace_macro(message)
Definition dis_macros.h:111
#define error_macro(message)
Definition dis_macros.h:49
#define warning_macro(message)
Definition dis_macros.h:53
Expr1::float_type T
Definition field_expr.h:230
check_macro(expr1.have_homogeneous_space(Xh1), "dual(expr1,expr2); expr1 should have homogeneous space. HINT: use dual(interpolate(Xh, expr1),expr2)")
This file is part of Rheolef.
int cg(const Matrix &A, Vector &x, const Vector2 &Mb, const Preconditioner &M, const solver_option &sopt=solver_option())
Definition cg.h:94
STL namespace.
void check(Float p, const field &uh)
void load(idiststream &in, Float &p, field &uh)