The ParILU convergence example.
This example depends on simple-solver.
 
  Introduction
About the example 
This example can be used to inspect the convergence behavior of parallel incomplete factorizations. *  
The commented program
auto try_generate(Function fun) -> decltype(fun())
{
    decltype(fun()) result;
    try {
        result = fun();
        std::cerr << 
"Error: " << err.
what() << 
'\n';
        std::exit(-1);
    }
    return result;
}
 
 
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
    residual->write(residual_data);
    mtx->write(mtx_data);
    double residual_norm{};
    for (
auto entry : residual_data.
nonzeros) {
 
        auto ref_row = it->row;
        auto ref_col = it->column;
        if (entry.row == ref_row && entry.column == ref_col) {
            ++it;
        }
    }
    return std::sqrt(residual_norm);
}
 
 
int main(int argc, char* argv[])
{
    using ValueType = double;
    using IndexType = int;
The Error class is used to report exceptional behaviour in library functions.
Definition exception.hpp:57
virtual const char * what() const noexcept override
Returns a human-readable string with a more detailed description of the error.
Definition exception.hpp:74
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:121
constexpr auto squared_norm(const T &x) -> decltype(real(conj(x) *x))
Returns the squared norm of the object.
Definition math.hpp:913
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
std::vector< nonzero_type > nonzeros
A vector of tuples storing the non-zeros of the matrix.
Definition matrix_data.hpp:453
void sort_row_major()
Sorts the nonzero vector so the values follow row-major order.
Definition matrix_data.hpp:458
print usage message
if (argc < 2 || executors.find(argv[1]) == executors.end()) {
    std::cerr << "Usage: executable"
              << " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
                 "[<parilu|parilut|paric|parict] [<max-iterations>] "
                 "[<num-repetitions>] [<fill-in-limit>]\n";
    return -1;
}
generate executor based on first argument
auto exec = try_generate([&] { return executors.at(argv[1])(); });
set matrix and preconditioner name with default values
std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
std::string precond = argc < 4 ? "parilu" : argv[3];
int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
double limit = argc < 7 ? 2 : std::stod(argv[6]);
load matrix file into Csr format
        std::ifstream mtx_stream{matrix};
        if (!mtx_stream) {
            throw GKO_STREAM_ERROR("Unable to open matrix file");
        }
        std::cerr << "Reading " << matrix << std::endl;
                                                                 exec);
    }));
 
    auto factory_generator =
        if (precond == "parilu") {
            return gko::factorization::ParIlu<ValueType, IndexType>::build()
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "paric") {
            return gko::factorization::ParIc<ValueType, IndexType>::build()
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "parilut") {
            return gko::factorization::ParIlut<ValueType, IndexType>::build()
                .with_fill_in_limit(limit)
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "parict") {
            return gko::factorization::ParIct<ValueType, IndexType>::build()
                .with_fill_in_limit(limit)
                .with_iterations(iteration)
                .on(exec);
        } else {
            GKO_NOT_IMPLEMENTED;
        }
    };
 
    auto minus_one =
    for (int it = 1; it <= max_iterations; ++it) {
        auto factory = factory_generator(it);
 
        std::cout << it << ';';
        std::vector<long> times;
        std::vector<double> residuals;
        for (int rep = 0; rep < num_repetitions; ++rep) {
            auto tic = std::chrono::high_resolution_clock::now();
            auto result =
            exec->synchronize();
            auto toc = std::chrono::high_resolution_clock::now();
            result->get_operators()[0]->apply(one, result->get_operators()[1],
                                              minus_one, residual);
            times.push_back(
                std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
                    .count());
            residuals.push_back(
                compute_ilu_residual_norm(residual.get(), mtx.get()));
        }
        for (auto el : times) {
            std::cout << el << ';';
        }
        for (auto el : residuals) {
            std::cout << el << ';';
        }
        std::cout << '\n';
    }
}
std::unique_ptr< Matrix > initialize(size_type stride, std::initializer_list< typename Matrix::value_type > vals, std::shared_ptr< const Executor > exec, TArgs &&... create_args)
Creates and initializes a column-vector.
Definition dense.hpp:1565
@ factory
LinOpFactory events.
Definition profiler_hook.hpp:32
constexpr T one()
Returns the multiplicative identity for T.
Definition math.hpp:630
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
detail::cloned_type< Pointer > clone(const Pointer &p)
Creates a unique clone of the object pointed to by p.
Definition utils_helper.hpp:173
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159
std::decay_t< T > * as(U *obj)
Performs polymorphic type conversion.
Definition utils_helper.hpp:307
detail::shared_type< OwningPointer > share(OwningPointer &&p)
Marks the object pointed to by p as shared.
Definition utils_helper.hpp:224
  
Results
This is the expected output:
Usage: executable <reference|omp|cuda|hip|dpcpp> [<matrix-file>] [<parilu|parilut|paric|parict] [<max-iterations>] [<num-repetitions>] [fill-in-limit]
When specifying an executor:
Reading data/A.mtx
1;71800;10300;8800;8200;8000;7700;7500;7500;7500;7400;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;1.0331e-14;
2;15500;9100;13500;9000;8600;8800;8700;8600;8600;8500;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
3;16500;10200;10100;10100;9900;10000;9800;9800;9900;9900;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
4;17500;11500;11200;15600;11300;11200;11400;11200;11200;11100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
5;18800;12800;12700;12600;12500;12400;12400;12400;12400;14100;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
6;19200;13400;23100;15400;13200;13000;13000;13000;13100;13000;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
7;20500;14500;14400;14200;14200;14300;14200;14100;14300;14200;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
8;21600;15700;86200;16300;15700;15600;15500;15400;15500;15600;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
9;22700;17000;16700;16600;16700;16800;20400;17400;17500;17400;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
10;25500;19000;18800;18700;18700;18800;18600;18700;18600;18700;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;4.15407e-16;
Comments about programming and debugging 
 
The plain program
 
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <string>
 
#include <ginkgo/ginkgo.hpp>
 
 
const std::map<std::string, std::function<std::shared_ptr<gko::Executor>()>>
    executors{
        {"reference", [] { return gko::ReferenceExecutor::create(); }},
        {"cuda",
         [] {
         }},
        {"hip",
         [] {
         }},
        {"dpcpp", [] {
         }}};
 
 
template <typename Function>
auto try_generate(Function fun) -> decltype(fun())
{
    decltype(fun()) result;
    try {
        result = fun();
        std::cerr << 
"Error: " << err.
what() << 
'\n';
        std::exit(-1);
    }
    return result;
}
 
 
template <typename ValueType, typename IndexType>
double compute_ilu_residual_norm(
{
    residual->write(residual_data);
    mtx->write(mtx_data);
    double residual_norm{};
    for (
auto entry : residual_data.
nonzeros) {
 
        auto ref_row = it->row;
        auto ref_col = it->column;
        if (entry.row == ref_row && entry.column == ref_col) {
            ++it;
        }
    }
    return std::sqrt(residual_norm);
}
 
 
int main(int argc, char* argv[])
{
    using ValueType = double;
    using IndexType = int;
 
    if (argc < 2 || executors.find(argv[1]) == executors.end()) {
        std::cerr << "Usage: executable"
                  << " <reference|omp|cuda|hip|dpcpp> [<matrix-file>] "
                     "[<parilu|parilut|paric|parict] [<max-iterations>] "
                     "[<num-repetitions>] [<fill-in-limit>]\n";
        return -1;
    }
 
    auto exec = try_generate([&] { return executors.at(argv[1])(); });
 
    std::string matrix = argc < 3 ? "data/A.mtx" : argv[2];
    std::string precond = argc < 4 ? "parilu" : argv[3];
    int max_iterations = argc < 5 ? 10 : std::stoi(argv[4]);
    int num_repetitions = argc < 6 ? 10 : std::stoi(argv[5]);
    double limit = argc < 7 ? 2 : std::stod(argv[6]);
 
        std::ifstream mtx_stream{matrix};
        if (!mtx_stream) {
            throw GKO_STREAM_ERROR("Unable to open matrix file");
        }
        std::cerr << "Reading " << matrix << std::endl;
                                                                 exec);
    }));
 
    auto factory_generator =
        if (precond == "parilu") {
            return gko::factorization::ParIlu<ValueType, IndexType>::build()
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "paric") {
            return gko::factorization::ParIc<ValueType, IndexType>::build()
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "parilut") {
            return gko::factorization::ParIlut<ValueType, IndexType>::build()
                .with_fill_in_limit(limit)
                .with_iterations(iteration)
                .on(exec);
        } else if (precond == "parict") {
            return gko::factorization::ParIct<ValueType, IndexType>::build()
                .with_fill_in_limit(limit)
                .with_iterations(iteration)
                .on(exec);
        } else {
            GKO_NOT_IMPLEMENTED;
        }
    };
 
    auto minus_one =
    for (int it = 1; it <= max_iterations; ++it) {
        auto factory = factory_generator(it);
 
        std::cout << it << ';';
        std::vector<long> times;
        std::vector<double> residuals;
        for (int rep = 0; rep < num_repetitions; ++rep) {
            auto tic = std::chrono::high_resolution_clock::now();
            auto result =
            exec->synchronize();
            auto toc = std::chrono::high_resolution_clock::now();
            result->get_operators()[0]->apply(one, result->get_operators()[1],
                                              minus_one, residual);
            times.push_back(
                std::chrono::duration_cast<std::chrono::nanoseconds>(toc - tic)
                    .count());
            residuals.push_back(
                compute_ilu_residual_norm(residual.get(), mtx.get()));
        }
        for (auto el : times) {
            std::cout << el << ';';
        }
        for (auto el : residuals) {
            std::cout << el << ';';
        }
        std::cout << '\n';
    }
}
static std::shared_ptr< CudaExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_cuda_alloc_mode, CUstream_st *stream=nullptr)
Creates a new CudaExecutor.
static std::shared_ptr< DpcppExecutor > create(int device_id, std::shared_ptr< Executor > master, std::string device_type="all", dpcpp_queue_property property=dpcpp_queue_property::in_order)
Creates a new DpcppExecutor.
static std::shared_ptr< HipExecutor > create(int device_id, std::shared_ptr< Executor > master, bool device_reset, allocation_mode alloc_mode=default_hip_alloc_mode, CUstream_st *stream=nullptr)
Creates a new HipExecutor.
static std::shared_ptr< OmpExecutor > create(std::shared_ptr< CpuAllocatorBase > alloc=std::make_shared< CpuAllocator >())
Creates a new OmpExecutor.
Definition executor.hpp:1396