Ginkgo Generated from branch based on main. Ginkgo version 1.9.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
The adaptiveprecision-blockjacobi program

The preconditioned solver example.

This example depends on preconditioned-solver.

Table of contents
  1. Introduction
  2. The commented program
  1. Results
  2. The plain program

This example shows how to use the adaptive precision block-Jacobi preconditioner.

In this example, we first read in a matrix from file, then generate a right-hand side and an initial guess. The preconditioned CG solver is enhanced with a block-Jacobi preconditioner that optimizes the storage format for the distinct inverted diagonal blocks to the numerical requirements. The example features the iteration count and runtime of the CG solver.

The commented program

const auto executor_string = argc >= 2 ? argv[1] : "reference";

Figure out where to run the code

std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1396

executor where Ginkgo will perform the computation

const auto exec = exec_map.at(executor_string)(); // throws if not valid

Read data

auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159

Create RHS and initial guess as 1

gko::size_type size = A->get_size()[0];
auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition utils_helper.hpp:173
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26

Calculate initial residual by overwriting b

auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1565

copy b again

b->copy_from(host_x);

Create solver factory

const RealValueType reduction_factor = 1e-7;
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),
.with_reduction_factor(reduction_factor))
The ResidualNorm class is a stopping criterion which stops the iteration process when the actual resi...
Definition residual_norm.hpp:113

Add preconditioner, these 2 lines are the only difference from the simple solver example

.with_preconditioner(
bj::build().with_max_block_size(16u).with_storage_optimization(
.on(exec);
static constexpr precision_reduction autodetect() noexcept
Returns a special encoding which instructs the algorithm to automatically detect the best precision.
Definition types.hpp:313

Create solver

std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver_gen->add_logger(logger);
auto solver = solver_gen->generate(A);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Creates a convergence logger.
Definition convergence.hpp:73

Solve system

exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);

Get residual

auto res = gko::as<real_vec>(logger->get_residual_norm());
auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "Implicit residual norm squared (r^2):\n";
write(std::cout, impl_res);
std::decay_t< T > * as(U *obj)
Performs polymorphic type conversion.
Definition utils_helper.hpp:307

Print solver statistics

std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}

Results

This is the expected output:

Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
194.679
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
5.69384e-06
Implicit residual norm squared (r^2):
%%MatrixMarket matrix array real general
1 1
1.27043e-15
CG iteration count: 5
CG execution time [ms]: 0.080041

Comments about programming and debugging

The plain program

#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
#include <ginkgo/ginkgo.hpp>
int main(int argc, char* argv[])
{
using ValueType = double;
using RealValueType = gko::remove_complex<ValueType>;
using IndexType = int;
std::cout << gko::version_info::get() << std::endl;
if (argc == 2 && (std::string(argv[1]) == "--help")) {
std::cerr << "Usage: " << argv[0] << " [executor]" << std::endl;
std::exit(-1);
}
const auto executor_string = argc >= 2 ? argv[1] : "reference";
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
exec_map{
{"omp", [] { return gko::OmpExecutor::create(); }},
{"cuda",
[] {
}},
{"hip",
[] {
}},
{"dpcpp",
[] {
}},
{"reference", [] { return gko::ReferenceExecutor::create(); }}};
const auto exec = exec_map.at(executor_string)(); // throws if not valid
auto A = share(gko::read<mtx>(std::ifstream("data/A.mtx"), exec));
gko::size_type size = A->get_size()[0];
auto host_x = vec::create(exec->get_master(), gko::dim<2>(size, 1));
for (auto i = 0; i < size; i++) {
host_x->at(i, 0) = 1.;
}
auto x = gko::clone(exec, host_x);
auto b = gko::clone(exec, host_x);
auto one = gko::initialize<vec>({1.0}, exec);
auto neg_one = gko::initialize<vec>({-1.0}, exec);
auto initres = gko::initialize<real_vec>({0.0}, exec);
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
b->copy_from(host_x);
const RealValueType reduction_factor = 1e-7;
auto solver_gen =
cg::build()
.with_criteria(gko::stop::Iteration::build().with_max_iters(10000u),
.with_reduction_factor(reduction_factor))
.with_preconditioner(
bj::build().with_max_block_size(16u).with_storage_optimization(
.on(exec);
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
solver_gen->add_logger(logger);
auto solver = solver_gen->generate(A);
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
auto res = gko::as<real_vec>(logger->get_residual_norm());
auto impl_res = gko::as<real_vec>(logger->get_implicit_sq_resnorm());
std::cout << "Initial residual norm sqrt(r^T r):\n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r):\n";
write(std::cout, res);
std::cout << "Implicit residual norm squared (r^2):\n";
write(std::cout, impl_res);
std::cout << "CG iteration count: " << logger->get_num_iterations()
<< std::endl;
std::cout << "CG execution time [ms]: "
<< static_cast<double>(time.count()) / 1000000.0 << std::endl;
}
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:21
Dense is a matrix format which explicitly stores all values of the matrix.
Definition sparsity_csr.hpp:25
A block-Jacobi preconditioner is a block-diagonal linear operator, obtained by inverting the diagonal...
Definition jacobi.hpp:190
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition cg.hpp:50
static const version_info & get()
Returns an instance of version_info.
Definition version.hpp:139
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:630
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:260
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:295
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:224