The preconditioned solver example.
This example depends on multigrid-preconditioned-solver, mixed-multigrid-solver.
 
  This example shows how to use the mixed-precision multigrid preconditioner.
In this example, we first read in a matrix from a file. The preconditioned CG solver is enhanced with a mixed-precision multigrid preconditioner. The example features the generating time and runtime of the CG solver.
 
The commented program
Print version information
 
const auto executor_string = argc >= 2 ? argv[1] : "reference";
static const version_info & get()
Returns an instance of version_info.
Definition version.hpp:139
 Figure out where to run the code
std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
    exec_map{
        {"cuda",
         [] {
         }},
        {"hip",
         [] {
         }},
        {"dpcpp",
         [] {
                 0, gko::ReferenceExecutor::create());
         }},
        {"reference", [] { return gko::ReferenceExecutor::create(); }}};
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1396
executor where Ginkgo will perform the computation
const auto exec = exec_map.at(executor_string)();  
 
const int mixed_int = argc >= 3 ? std::atoi(argv[2]) : 1;
const bool use_mixed = mixed_int != 0;  
std::cout << "Using mixed precision? " << use_mixed << std::endl;
Read data
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159
 Create RHS as 1 and initial guess as 0
auto host_x = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
auto host_b = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
for (auto i = 0; i < size; i++) {
    host_x->at(i, 0) = 0.;
    host_b->at(i, 0) = 1.;
}
auto x = vec::create(exec);
auto b = vec::create(exec);
x->copy_from(host_x);
b->copy_from(host_b);
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
 Calculate initial residual by overwriting b
A->apply(one, x, neg_one, b);
b->compute_norm2(initres);
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1565
 copy b again
Prepare the stopping criteria
auto iter_stop =
    gko::share(gko::stop::Iteration::build().with_max_iters(100u).on(exec));
 
auto tol_stop = 
gko::share(gko::stop::ResidualNorm<ValueType>::build()
 
                               .with_baseline(gko::stop::mode::absolute)
                               .with_reduction_factor(tolerance)
                               .on(exec));
 
std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
iter_stop->add_logger(logger);
tol_stop->add_logger(logger);
static std::unique_ptr< Convergence > create(std::shared_ptr< const Executor >, const mask_type &enabled_events=Logger::criterion_events_mask|Logger::iteration_complete_mask)
Creates a convergence logger.
Definition convergence.hpp:73
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:260
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:224
 Create smoother factory (ir with bj)
auto inner_solver_gen =
    gko::share(bj::build().with_max_block_size(1u).on(exec));
 
auto inner_solver_gen_f =
    gko::share(bj_f::build().with_max_block_size(1u).on(exec));
 
    ir::build()
        .with_solver(inner_solver_gen)
        .with_relaxation_factor(static_cast<ValueType>(0.9))
        .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
        .on(exec));
    ir_f::build()
        .with_solver(inner_solver_gen_f)
        .with_relaxation_factor(static_cast<MixedType>(0.9))
        .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
        .on(exec));
Create MultigridLevel factory
auto mg_level_gen =
    gko::share(pgm::build().with_deterministic(
true).on(exec));
 
auto mg_level_gen_f =
    gko::share(pgm_f::build().with_deterministic(
true).on(exec));
 
Create CoarsestSolver factory
    ir::build()
        .with_solver(inner_solver_gen)
        .with_relaxation_factor(static_cast<ValueType>(0.9))
        .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
        .on(exec));
    ir_f::build()
        .with_solver(inner_solver_gen_f)
        .with_relaxation_factor(static_cast<MixedType>(0.9))
        .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
        .on(exec));
 Create multigrid factory
std::shared_ptr<gko::LinOpFactory> multigrid_gen;
if (use_mixed) {
    multigrid_gen =
        mg::build()
            .with_max_levels(10u)
            .with_min_coarse_rows(2u)
            .with_pre_smoother(smoother_gen, smoother_gen_f)
            .with_post_uses_pre(true)
            .with_mg_level(mg_level_gen, mg_level_gen_f)
Definition lin_op.hpp:117
The first (index 0) level will use the first mg_level_gen, smoother_gen which are the factories with ValueType. The rest of levels (>= 1) will use the second (index 1) mg_level_gen2 and smoother_gen2 which use the MixedType. The rest of levels will use different type than the normal multigrid.
                return level >= 1 ? 1 : 0;
            })
            .with_coarsest_solver(coarsest_gen_f)
            .with_default_initial_guess(
            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
            .on(exec);
} else {
    multigrid_gen =
        mg::build()
            .with_max_levels(10u)
            .with_min_coarse_rows(2u)
            .with_pre_smoother(smoother_gen)
            .with_post_uses_pre(true)
            .with_mg_level(mg_level_gen)
            .with_coarsest_solver(coarsest_gen)
            .with_default_initial_guess(
            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
            .on(exec);
}
@ zero
the input is zero
Definition solver_base.hpp:37
Create solver factory
auto solver_gen = cg::build()
                      .with_criteria(iter_stop, tol_stop)
                      .with_preconditioner(multigrid_gen)
                      .on(exec);
Create solver
std::chrono::nanoseconds gen_time(0);
auto gen_tic = std::chrono::steady_clock::now();
auto solver = solver_gen->generate(A);
exec->synchronize();
auto gen_toc = std::chrono::steady_clock::now();
gen_time +=
    std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);
Solve system
exec->synchronize();
std::chrono::nanoseconds time(0);
auto tic = std::chrono::steady_clock::now();
solver->apply(b, x);
exec->synchronize();
auto toc = std::chrono::steady_clock::now();
time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
Calculate residual
A->apply(one, x, neg_one, b);
b->compute_norm2(res);
 
std::cout << "Initial residual norm sqrt(r^T r): \n";
write(std::cout, initres);
std::cout << "Final residual norm sqrt(r^T r): \n";
write(std::cout, res);
 Print solver statistics
    std::cout << "CG iteration count:     " << logger->get_num_iterations()
              << std::endl;
    std::cout << "CG generation time [ms]: "
              << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time [ms]: "
              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time per iteration[ms]: "
              << static_cast<double>(time.count()) / 1000000.0 /
                     logger->get_num_iterations()
              << std::endl;
}
 
Results
This is the expected output:
Initial residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
4.3589
Final residual norm sqrt(r^T r):
%%MatrixMarket matrix array real general
1 1
1.69858e-09
CG iteration count:     39
CG generation time [ms]: 2.04293
CG execution time [ms]: 22.3874
CG execution time per iteration[ms]: 0.574036
Comments about programming and debugging 
 
The plain program
 
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <string>
 
#include <ginkgo/ginkgo.hpp>
 
 
int main(int argc, char* argv[])
{
    using ValueType = double;
    using MixedType = float;
    using IndexType = int;
 
 
 
    const auto executor_string = argc >= 2 ? argv[1] : "reference";
    std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
        exec_map{
            {"cuda",
             [] {
             }},
            {"hip",
             [] {
             }},
            {"dpcpp",
             [] {
                     0, gko::ReferenceExecutor::create());
             }},
            {"reference", [] { return gko::ReferenceExecutor::create(); }}};
 
    const auto exec = exec_map.at(executor_string)();  
 
    const int mixed_int = argc >= 3 ? std::atoi(argv[2]) : 1;
    const bool use_mixed = mixed_int != 0;  
    std::cout << "Using mixed precision? " << use_mixed << std::endl;
    auto host_x = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
    auto host_b = vec::create(exec->get_master(), 
gko::dim<2>(size, 1));
 
    for (auto i = 0; i < size; i++) {
        host_x->at(i, 0) = 0.;
        host_b->at(i, 0) = 1.;
    }
    auto x = vec::create(exec);
    auto b = vec::create(exec);
    x->copy_from(host_x);
    b->copy_from(host_b);
 
    A->apply(one, x, neg_one, b);
    b->compute_norm2(initres);
 
    b->copy_from(host_b);
 
    auto iter_stop =
        gko::share(gko::stop::Iteration::build().with_max_iters(100u).on(exec));
 
    auto tol_stop = 
gko::share(gko::stop::ResidualNorm<ValueType>::build()
 
                                   .with_baseline(gko::stop::mode::absolute)
                                   .with_reduction_factor(tolerance)
                                   .on(exec));
 
    std::shared_ptr<const gko::log::Convergence<ValueType>> logger =
    iter_stop->add_logger(logger);
    tol_stop->add_logger(logger);
 
    auto inner_solver_gen =
        gko::share(bj::build().with_max_block_size(1u).on(exec));
 
    auto inner_solver_gen_f =
        gko::share(bj_f::build().with_max_block_size(1u).on(exec));
 
        ir::build()
            .with_solver(inner_solver_gen)
            .with_relaxation_factor(static_cast<ValueType>(0.9))
            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
            .on(exec));
        ir_f::build()
            .with_solver(inner_solver_gen_f)
            .with_relaxation_factor(static_cast<MixedType>(0.9))
            .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
            .on(exec));
    auto mg_level_gen =
        gko::share(pgm::build().with_deterministic(
true).on(exec));
 
    auto mg_level_gen_f =
        gko::share(pgm_f::build().with_deterministic(
true).on(exec));
 
        ir::build()
            .with_solver(inner_solver_gen)
            .with_relaxation_factor(static_cast<ValueType>(0.9))
            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
            .on(exec));
        ir_f::build()
            .with_solver(inner_solver_gen_f)
            .with_relaxation_factor(static_cast<MixedType>(0.9))
            .with_criteria(gko::stop::Iteration::build().with_max_iters(4u))
            .on(exec));
    std::shared_ptr<gko::LinOpFactory> multigrid_gen;
    if (use_mixed) {
        multigrid_gen =
            mg::build()
                .with_max_levels(10u)
                .with_min_coarse_rows(2u)
                .with_pre_smoother(smoother_gen, smoother_gen_f)
                .with_post_uses_pre(true)
                .with_mg_level(mg_level_gen, mg_level_gen_f)
                    return level >= 1 ? 1 : 0;
                })
                .with_coarsest_solver(coarsest_gen_f)
                .with_default_initial_guess(
                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                .on(exec);
    } else {
        multigrid_gen =
            mg::build()
                .with_max_levels(10u)
                .with_min_coarse_rows(2u)
                .with_pre_smoother(smoother_gen)
                .with_post_uses_pre(true)
                .with_mg_level(mg_level_gen)
                .with_coarsest_solver(coarsest_gen)
                .with_default_initial_guess(
                .with_criteria(gko::stop::Iteration::build().with_max_iters(1u))
                .on(exec);
    }
    auto solver_gen = cg::build()
                          .with_criteria(iter_stop, tol_stop)
                          .with_preconditioner(multigrid_gen)
                          .on(exec);
    std::chrono::nanoseconds gen_time(0);
    auto gen_tic = std::chrono::steady_clock::now();
    auto solver = solver_gen->generate(A);
 
    exec->synchronize();
    auto gen_toc = std::chrono::steady_clock::now();
    gen_time +=
        std::chrono::duration_cast<std::chrono::nanoseconds>(gen_toc - gen_tic);
 
    exec->synchronize();
    std::chrono::nanoseconds time(0);
    auto tic = std::chrono::steady_clock::now();
    exec->synchronize();
    auto toc = std::chrono::steady_clock::now();
    time += std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic);
 
    A->apply(one, x, neg_one, b);
    b->compute_norm2(res);
 
    std::cout << "Initial residual norm sqrt(r^T r): \n";
    write(std::cout, initres);
 
    std::cout << "Final residual norm sqrt(r^T r): \n";
 
    std::cout << "CG iteration count:     " << logger->get_num_iterations()
              << std::endl;
    std::cout << "CG generation time [ms]: "
              << static_cast<double>(gen_time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time [ms]: "
              << static_cast<double>(time.count()) / 1000000.0 << std::endl;
    std::cout << "CG execution time per iteration[ms]: "
              << static_cast<double>(time.count()) / 1000000.0 /
                     logger->get_num_iterations()
              << std::endl;
}
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:121
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:117
Parallel graph match (Pgm) is the aggregate method introduced in the paper M.
Definition pgm.hpp:52
A block-Jacobi preconditioner is a block-diagonal linear operator, obtained by inverting the diagonal...
Definition jacobi.hpp:190
CG or the conjugate gradient method is an iterative type Krylov subspace method which is suitable for...
Definition cg.hpp:50
FCG or the flexible conjugate gradient method is an iterative type Krylov subspace method which is su...
Definition fcg.hpp:55
Iterative refinement (IR) is an iterative method that uses another coarse method to approximate the e...
Definition ir.hpp:85
Multigrid methods have a hierarchy of many levels, whose corase level is a subset of the fine level,...
Definition multigrid.hpp:110
@ solver
Solver events.
Definition profiler_hook.hpp:34
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:630
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:295