23#include "rheolef/config.h"
24#if defined(_RHEOLEF_HAVE_PASTIX) && defined(_RHEOLEF_HAVE_MPI)
26#include "rheolef/cg.h"
27#include "rheolef/eye.h"
33solver_pastix_rep<T,distributed>::solver_pastix_rep (
const csr<T>& a,
const solver_option& opt)
48solver_pastix_rep<T,distributed>::load (
const csr<T>& a,
const solver_option& opt)
51 base::_is_sym =
a.is_symmetric();
52 base::_pattern_dimension =
a.pattern_dimension();
53 _comm =
a.row_ownership().comm();
54 base::_csr_row_ownership =
a.row_ownership();
55 base::_csr_col_ownership =
a.col_ownership();
58 check_macro (
a.nrow() ==
a.ncol(),
"factorization: only square matrix are supported");
62 base::_have_pastix_bug_small_matrix =
true;
65 base::_have_pastix_bug_small_matrix = mpi::all_reduce (comm(), base::_have_pastix_bug_small_matrix, mpi::maximum<bool>());
66 if (base::_have_pastix_bug_small_matrix) {
68 base::_a_when_bug =
a;
76 if (base::_opt.do_check) {
79 symbolic_factorization ();
80 numeric_factorization();
85solver_pastix_rep<T,distributed>::load_unsymmetric (
const csr<T>& a)
88 size_t nnz =
a.nnz() +
a.ext_nnz();
90 load_both_continued (a);
96compute_csr_upper_dia_nnz (
const csr<T>& a)
98 size_t csr_upper_dia_nnz = 0;
99 size_t first_dis_i =
a.row_ownership().first_index();
100 size_t first_dis_j =
a.col_ownership().first_index();
101 for (
size_t i = 0; i <
a.nrow(); i++) {
102 size_t dis_i = first_dis_i + i;
103 typename csr<T>::const_iterator ia =
a.begin();
104 for (
typename csr<T>::const_data_iterator ap = ia[i]; ap < ia[i+1]; ap++) {
105 size_t j = (*ap).first;
106 size_t dis_j = first_dis_j + j;
107 if (dis_i <= dis_j) csr_upper_dia_nnz++;
110 return csr_upper_dia_nnz;
116compute_csr_upper_ext_nnz (
const csr<T>& a)
118 size_t csr_upper_ext_nnz = 0;
119 size_t first_dis_i =
a.row_ownership().first_index();
120 size_t first_dis_j =
a.col_ownership().first_index();
121 for (
size_t i = 0; i <
a.nrow(); i++) {
122 size_t dis_i = first_dis_i + i;
123 typename csr<T>::const_iterator ext_ia =
a.ext_begin();
124 for (
typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
125 const size_t& jext = (*ap).first;
126 size_t dis_j =
a.jext2dis_j (jext);
127 if (dis_i <= dis_j) csr_upper_ext_nnz++;
130 return csr_upper_ext_nnz;
134solver_pastix_rep<T,distributed>::load_symmetric (
const csr<T>& a)
138 size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
139 size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
140 size_t nnz = csr_upper_dia_nnz + csr_upper_ext_nnz;
142 load_both_continued (a);
146solver_pastix_rep<T,distributed>::load_both_continued (
const csr<T>& a)
148 size_t first_dis_i =
a.row_ownership().first_index();
149 size_t first_dis_j =
a.col_ownership().first_index();
150 typename csr<T>::const_iterator aptr =
a.begin();
152 base::_ptr [0] = bp + base::_base;
153 for (
size_t i = 0; i <
a.nrow(); i++) {
154 size_t dis_i = first_dis_i + i;
155 _i2dis_i_base [i] = dis_i + base::_base;
156 for (
typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
157 size_t j = (*ap).first;
158 const T& val = (*ap).second;
159 size_t dis_j = first_dis_j + j;
160 if (base::_is_sym && dis_i > dis_j)
continue;
161 base::_val [bp] = val;
162 base::_idx [bp] = dis_j + base::_base;
166 typename csr<T>::const_iterator ext_ia =
a.ext_begin();
167 for (
typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
168 size_t jext = (*ap).first;
169 const T& val = (*ap).second;
170 size_t dis_j =
a.jext2dis_j (jext);
171 if (base::_is_sym && dis_i > dis_j)
continue;
172 base::_val [bp] = val;
173 base::_idx [bp] = dis_j + base::_base;
176 base::_ptr [i+1] = bp + base::_base;
178 check_macro (bp == base::_nnz,
"factorization: invalid count: nnz="<<base::_nnz<<
", count="<<bp);
182solver_pastix_rep<T,distributed>::update_values (
const csr<T>& a)
184 if (base::_have_pastix_bug_small_matrix) {
185 base::_a_when_bug =
a;
188 check_macro (
size_t(base::_n) ==
a.nrow() &&
size_t(base::_n) ==
a.ncol(),
189 "update: local input matrix size distribution mismatch: ("<<
a.nrow()<<
","<<
a.ncol()<<
"), expect ("
190 << base::_n <<
"," << base::_n <<
")");
192 if (!base::_is_sym) {
193 nnz_a =
a.nnz() +
a.ext_nnz();
196 size_t csr_upper_dia_nnz = compute_csr_upper_dia_nnz(a);
197 size_t csr_upper_ext_nnz = compute_csr_upper_ext_nnz(a);
198 nnz_a = csr_upper_dia_nnz + csr_upper_ext_nnz;
201 "update: local input matrix nnz distribution mismatch: nnz="<<nnz_a<<
", expect nnz="<<base::_nnz);
203 size_t first_dis_i =
a.row_ownership().first_index();
204 size_t first_dis_j =
a.col_ownership().first_index();
206 typename csr<T>::const_iterator aptr =
a.begin();
207 for (
size_t i = 0; i <
a.nrow(); i++) {
208 size_t dis_i = first_dis_i + i;
209 for (
typename csr<T>::const_data_iterator ap = aptr[i]; ap < aptr[i+1]; ap++) {
210 size_t j = (*ap).first;
211 size_t dis_j = first_dis_j + j;
212 if (base::_is_sym && dis_i > dis_j)
continue;
213 base::_val [bp] = (*ap).second;
216 typename csr<T>::const_iterator ext_ia =
a.ext_begin();
217 for (
typename csr<T>::const_data_iterator ap = ext_ia[i]; ap < ext_ia[i+1]; ap++) {
218 size_t jext = (*ap).first;
219 size_t dis_j =
a.jext2dis_j (jext);
220 if (base::_is_sym && dis_i > dis_j)
continue;
221 base::_val [bp] = (*ap).second;
225 numeric_factorization();
229solver_pastix_rep<T,distributed>::resize (pastix_int_t n, pastix_int_t nnz)
233 base::_ptr.resize(n+1);
234 base::_idx.resize(nnz);
235 base::_val.resize(nnz);
236 _i2dis_i_base.resize(n);
240solver_pastix_rep<T,distributed>::check ()
const
248 pastix_int_t symmetry = (is_symmetric() ? API_SYM_YES : API_SYM_NO);
249 pastix_int_t *ptr_begin = (pastix_int_t*) base::_ptr.begin().operator->();
250 pastix_int_t *idx_begin = (pastix_int_t*) base::_idx.begin().operator->();
251 T *val_begin = (
T*) base::_val.begin().operator->();
252 pastix_int_t *i2dis_i_base_begin = (pastix_int_t*) _i2dis_i_base.begin().operator->();
254 = d_pastix_checkMatrix(_comm, base::_opt.verbose_level, symmetry, API_YES,
255 base::_n, &ptr_begin, &idx_begin, &val_begin, &i2dis_i_base_begin,
257 check_macro (status == 0,
"pastix check returns error status = " << status);
261solver_pastix_rep<T,distributed>::symbolic_factorization ()
263 if (base::_have_pastix_bug_small_matrix) {
266 const pastix_int_t nbthread = 1;
267 const pastix_int_t ordering = 0;
271 base::_iparm[IPARM_START_TASK] = API_TASK_INIT;
272 base::_iparm[IPARM_END_TASK] = API_TASK_INIT;
273 base::_iparm[IPARM_MODIFY_PARAMETER] = API_NO;
274 d_dpastix (base::pp_data(),
277 base::_ptr.begin().operator->(),
278 base::_idx.begin().operator->(),
280 _i2dis_i_base.begin().operator->(),
289 base::_iparm[IPARM_THREAD_NBR] = nbthread;
290 if (is_symmetric()) {
291 base::_iparm[IPARM_SYM] = API_SYM_YES;
292 base::_iparm[IPARM_FACTORIZATION] = API_FACT_LDLT;
294 base::_iparm[IPARM_SYM] = API_SYM_NO;
295 base::_iparm[IPARM_FACTORIZATION] = API_FACT_LU;
297 base::_iparm[IPARM_MATRIX_VERIFICATION] = API_NO;
298 base::_iparm[IPARM_VERBOSE] = base::_opt.verbose_level;
299 base::_iparm[IPARM_ORDERING] = ordering;
301 if (base::_opt.iterative != solver_option::decide) {
302 do_incomplete = (base::_opt.iterative != 0);
304 do_incomplete = (base::_pattern_dimension > 2);
306 base::_iparm[IPARM_INCOMPLETE] = (do_incomplete ? 1 : 0);
307 base::_iparm[IPARM_OOC_LIMIT] = base::_opt.ooc;
308 if (base::_opt.iterative == 1) {
309 base::_dparm[DPARM_EPSILON_REFINEMENT] = base::_opt.tol;
311 base::_iparm[IPARM_LEVEL_OF_FILL] = base::_opt.level_of_fill;
312 base::_iparm[IPARM_AMALGAMATION_LEVEL] = base::_opt.amalgamation;
313 base::_iparm[IPARM_RHS_MAKING] = API_RHS_B;
317 base::_i2new_dis_i.resize (base::_n);
319 pastix_int_t itmp = 0;
326 base::_iparm[IPARM_START_TASK] = API_TASK_ORDERING;
327 base::_iparm[IPARM_END_TASK] = API_TASK_ANALYSE;
329 d_dpastix (base::pp_data(),
332 base::_ptr.begin().operator->(),
333 (base::_idx.begin().operator->() != NULL) ? base::_idx.begin().operator->() : &itmp,
335 (_i2dis_i_base.begin().operator->() != NULL) ? _i2dis_i_base.begin().operator->() : &itmp,
336 (base::_i2new_dis_i.begin().operator->() != NULL) ? base::_i2new_dis_i.begin().operator->() : &itmp,
343 _new_n = d_pastix_getLocalNodeNbr (base::pp_data());
347 base::_new_i2dis_i_base.resize (_new_n);
348 d_pastix_getLocalNodeLst (base::pp_data(), base::_new_i2dis_i_base.begin().operator->());
352solver_pastix_rep<T,distributed>::numeric_factorization ()
354 if (base::_have_pastix_bug_small_matrix) {
359 pastix_int_t status2 = d_cscd_redispatch (
361 base::_ptr.begin().operator->(),
362 base::_idx.begin().operator->(),
363 base::_val.begin().operator->(),
365 _i2dis_i_base.begin().operator->(),
371 base::_new_i2dis_i_base.begin().operator->(),
373 check_macro (status2 == EXIT_SUCCESS,
"d_csd_redispatch failed");
377 base::_iparm[IPARM_START_TASK] = API_TASK_NUMFACT;
378 base::_iparm[IPARM_END_TASK] = API_TASK_NUMFACT;
379 d_dpastix (base::pp_data(),
385 base::_new_i2dis_i_base.begin().operator->(),
386 base::_i2new_dis_i.begin().operator->(),
395solver_pastix_rep<T,distributed>::trans_solve (
const vec<T>& rhs)
const
397 if (base::_have_pastix_bug_small_matrix) {
398 error_macro (
"trans_solve: bug, circumvent not yet !");
403 check_macro (rhs.size() ==
size_t(base::_n),
"invalid rhs size="<<rhs.size()<<
": expect size="<<base::_n);
406 base::_new_rhs.resize (_new_n);
407 std::set<size_t> dis_i_set;
408 std::map<size_t,size_t> dis_i2new_i;
409 size_t first_dis_i = base::_csr_row_ownership.first_index();
410 size_t last_dis_i = base::_csr_row_ownership.last_index();
411 for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
412 size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
413 if (dis_i >= first_dis_i && dis_i < last_dis_i) {
415 size_t i = dis_i - first_dis_i;
416 base::_new_rhs [new_i] = rhs [i];
419 dis_i_set.insert (dis_i);
420 dis_i2new_i.insert (std::make_pair(dis_i,new_i));
424 std::map<size_t,T> dis_i_map;
425 rhs.get_dis_entry (dis_i_set, dis_i_map);
426 for (
typename map<size_t,T>::const_iterator
427 iter = dis_i_map.begin(),
428 last = dis_i_map.end(); iter != last; iter++) {
429 size_t dis_i = (*iter).first;
430 const T& val = (*iter).second;
431 size_t new_i = dis_i2new_i [dis_i];
432 base::_new_rhs [new_i] = val;
439 base::_iparm[IPARM_START_TASK] = API_TASK_SOLVE;
440 base::_iparm[IPARM_END_TASK] = API_TASK_REFINE;
441 d_dpastix (base::pp_data(),
447 base::_new_i2dis_i_base.begin().operator->(),
448 base::_i2new_dis_i.begin().operator->(),
450 (_new_n != 0) ? base::_new_rhs.begin().operator->() : &vtmp,
456 vec<T> x (base::_csr_row_ownership);
457 for (pastix_int_t new_i = 0; new_i < _new_n; new_i++) {
458 size_t dis_i = base::_new_i2dis_i_base [new_i] - base::_base;
459 x.dis_entry (dis_i) = base::_new_rhs[new_i];
461 x.dis_entry_assembly();
466solver_pastix_rep<T,distributed>::solve (
const vec<T>& rhs)
const
468 if (base::_have_pastix_bug_small_matrix) {
469 T tol = std::numeric_limits<T>::epsilon();
470 size_t max_iter = 1000;
471 vec<T> x (base::_a_when_bug.col_ownership(), 0);
472 int status =
cg (base::_a_when_bug, x, rhs, eye<T>(), max_iter, tol, &derr);
473 check_macro (tol < std::numeric_limits<T>::epsilon(),
"solver(cg): machine precision not reached: "<<tol);
478 if (! base::_is_sym) {
479 warning_macro (
"solve: not yet supported for unsymmetric matrix");
481 return trans_solve (rhs);
484solver_pastix_rep<T,distributed>::~solver_pastix_rep()
488 base::_iparm[IPARM_START_TASK] = API_TASK_CLEAN;
489 base::_iparm[IPARM_END_TASK] = API_TASK_CLEAN;
491 d_dpastix (base::pp_data(),
500 base::_new_rhs.begin().operator->(),
506 if (_new_ptr != NULL) free (_new_ptr);
507 if (_new_idx != NULL) free (_new_idx);
508 if (_new_val != NULL) free (_new_val);
509 base::_pastix_data = 0;
516template class solver_pastix_rep<double,distributed>;
#define trace_macro(message)
#define error_macro(message)
#define warning_macro(message)
check_macro(expr1.have_homogeneous_space(Xh1), "dual(expr1,expr2); expr1 should have homogeneous space. HINT: use dual(interpolate(Xh, expr1),expr2)")
This file is part of Rheolef.
int cg(const Matrix &A, Vector &x, const Vector2 &Mb, const Preconditioner &M, const solver_option &sopt=solver_option())
void check(Float p, const field &uh)
void load(idiststream &in, Float &p, field &uh)