5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_ 
    6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_ 
    9#include <ginkgo/core/base/array.hpp> 
   10#include <ginkgo/core/base/index_set.hpp> 
   11#include <ginkgo/core/base/lin_op.hpp> 
   12#include <ginkgo/core/base/math.hpp> 
   13#include <ginkgo/core/matrix/permutation.hpp> 
   14#include <ginkgo/core/matrix/scaled_permutation.hpp> 
   21template <
typename ValueType>
 
   24template <
typename ValueType>
 
   27template <
typename ValueType, 
typename IndexType>
 
   30template <
typename ValueType, 
typename IndexType>
 
   33template <
typename ValueType, 
typename IndexType>
 
   36template <
typename ValueType, 
typename IndexType>
 
   39template <
typename ValueType, 
typename IndexType>
 
   42template <
typename ValueType, 
typename IndexType>
 
   45template <
typename ValueType, 
typename IndexType>
 
   48template <
typename ValueType, 
typename IndexType>
 
   55template <
typename ValueType = default_precision, 
typename IndexType = 
int32>
 
  100template <
typename ValueType = default_precision, 
typename IndexType = 
int32>
 
  102            public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
 
  103#if GINKGO_ENABLE_HALF 
  105                Csr<next_precision<next_precision<ValueType>>, IndexType>>,
 
  120                remove_complex<Csr<ValueType, IndexType>>>,
 
  123    friend class Coo<ValueType, IndexType>;
 
  124    friend class Dense<ValueType>;
 
  126    friend class Ell<ValueType, IndexType>;
 
  127    friend class Hybrid<ValueType, IndexType>;
 
  128    friend class Sellp<ValueType, IndexType>;
 
  130    friend class Fbcsr<ValueType, IndexType>;
 
  131    friend class CsrBuilder<ValueType, IndexType>;
 
  155    using value_type = ValueType;
 
  156    using index_type = IndexType;
 
  171        friend class automatical;
 
  212        virtual std::shared_ptr<strategy_type> 
copy() = 0;
 
  215        void set_name(std::string name) { name_ = name; }
 
 
  237            auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
 
  239            const bool is_mtx_on_host{host_mtx_exec ==
 
  241            const index_type* row_ptrs{};
 
  242            if (is_mtx_on_host) {
 
  245                row_ptrs_host = mtx_row_ptrs;
 
  248            auto num_rows = mtx_row_ptrs.
get_size() - 1;
 
  249            max_length_per_row_ = 0;
 
  250            for (
size_type i = 0; i < num_rows; i++) {
 
  251                max_length_per_row_ = std::max(max_length_per_row_,
 
  252                                               row_ptrs[i + 1] - row_ptrs[i]);
 
 
  256        int64_t 
clac_size(
const int64_t nnz)
 override { 
return 0; }
 
  258        index_type get_max_length_per_row() const noexcept
 
  260            return max_length_per_row_;
 
  263        std::shared_ptr<strategy_type> 
copy()
 override 
  265            return std::make_shared<classical>();
 
 
  269        index_type max_length_per_row_;
 
 
  288        int64_t 
clac_size(
const int64_t nnz)
 override { 
return 0; }
 
  290        std::shared_ptr<strategy_type> 
copy()
 override 
  292            return std::make_shared<merge_path>();
 
 
 
  313        int64_t 
clac_size(
const int64_t nnz)
 override { 
return 0; }
 
  315        std::shared_ptr<strategy_type> 
copy()
 override 
  317            return std::make_shared<cusparse>();
 
 
 
  337        int64_t 
clac_size(
const int64_t nnz)
 override { 
return 0; }
 
  339        std::shared_ptr<strategy_type> 
copy()
 override 
  341            return std::make_shared<sparselib>();
 
 
 
  367            : 
load_balance(exec->get_num_warps(), exec->get_warp_size())
 
 
  376            : 
load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
 
 
  387            : 
load_balance(exec->get_num_subgroups(), 32, false, 
"intel")
 
 
  402                     bool cuda_strategy = 
true,
 
  403                     std::string strategy_name = 
"none")
 
  406              warp_size_(warp_size),
 
  407              cuda_strategy_(cuda_strategy),
 
  408              strategy_name_(strategy_name)
 
 
  417                auto host_srow_exec = mtx_srow->
get_executor()->get_master();
 
  418                auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
 
  419                const bool is_srow_on_host{host_srow_exec ==
 
  421                const bool is_mtx_on_host{host_mtx_exec ==
 
  425                const index_type* row_ptrs{};
 
  427                if (is_srow_on_host) {
 
  430                    srow_host = *mtx_srow;
 
  433                if (is_mtx_on_host) {
 
  436                    row_ptrs_host = mtx_row_ptrs;
 
  442                const auto num_rows = mtx_row_ptrs.
get_size() - 1;
 
  443                const auto num_elems = row_ptrs[num_rows];
 
  444                const auto bucket_divider =
 
  445                    num_elems > 0 ? 
ceildiv(num_elems, warp_size_) : 1;
 
  446                for (
size_type i = 0; i < num_rows; i++) {
 
  450                    if (bucket < nwarps) {
 
  456                    srow[i] += srow[i - 1];
 
  458                if (!is_srow_on_host) {
 
  459                    *mtx_srow = srow_host;
 
 
  466            if (warp_size_ > 0) {
 
  468                if (nnz >= 
static_cast<int64_t
>(2e8)) {
 
  470                } 
else if (nnz >= 
static_cast<int64_t
>(2e7)) {
 
  472                } 
else if (nnz >= 
static_cast<int64_t
>(2e6)) {
 
  474                } 
else if (nnz >= 
static_cast<int64_t
>(2e5)) {
 
  477                if (strategy_name_ == 
"intel") {
 
  479                    if (nnz >= 
static_cast<int64_t
>(2e8)) {
 
  481                    } 
else if (nnz >= 
static_cast<int64_t
>(2e7)) {
 
  485#if GINKGO_HIP_PLATFORM_HCC 
  486                if (!cuda_strategy_) {
 
  488                    if (nnz >= 
static_cast<int64_t
>(1e7)) {
 
  490                    } 
else if (nnz >= 
static_cast<int64_t
>(1e6)) {
 
  496                auto nwarps = nwarps_ * multiple;
 
 
  503        std::shared_ptr<strategy_type> 
copy()
 override 
  505            return std::make_shared<load_balance>(
 
  506                nwarps_, warp_size_, cuda_strategy_, strategy_name_);
 
 
  513        std::string strategy_name_;
 
 
  520        const index_type nvidia_row_len_limit = 1024;
 
  523        const index_type nvidia_nnz_limit{
static_cast<index_type
>(1e6)};
 
  526        const index_type amd_row_len_limit = 768;
 
  529        const index_type amd_nnz_limit{
static_cast<index_type
>(1e8)};
 
  532        const index_type intel_row_len_limit = 25600;
 
  535        const index_type intel_nnz_limit{
static_cast<index_type
>(3e8)};
 
  555            : 
automatical(exec->get_num_warps(), exec->get_warp_size())
 
 
  564            : 
automatical(exec->get_num_warps(), exec->get_warp_size(), false)
 
 
  575            : 
automatical(exec->get_num_subgroups(), 32, false, 
"intel")
 
 
  590                    bool cuda_strategy = 
true,
 
  591                    std::string strategy_name = 
"none")
 
  594              warp_size_(warp_size),
 
  595              cuda_strategy_(cuda_strategy),
 
  596              strategy_name_(strategy_name),
 
  597              max_length_per_row_(0)
 
 
  606            index_type nnz_limit = nvidia_nnz_limit;
 
  607            index_type row_len_limit = nvidia_row_len_limit;
 
  608            if (strategy_name_ == 
"intel") {
 
  609                nnz_limit = intel_nnz_limit;
 
  610                row_len_limit = intel_row_len_limit;
 
  612#if GINKGO_HIP_PLATFORM_HCC 
  613            if (!cuda_strategy_) {
 
  614                nnz_limit = amd_nnz_limit;
 
  615                row_len_limit = amd_row_len_limit;
 
  618            auto host_mtx_exec = mtx_row_ptrs.
get_executor()->get_master();
 
  619            const bool is_mtx_on_host{host_mtx_exec ==
 
  622            const index_type* row_ptrs{};
 
  623            if (is_mtx_on_host) {
 
  626                row_ptrs_host = mtx_row_ptrs;
 
  629            const auto num_rows = mtx_row_ptrs.
get_size() - 1;
 
  630            if (row_ptrs[num_rows] > nnz_limit) {
 
  632                                             cuda_strategy_, strategy_name_);
 
  633                if (is_mtx_on_host) {
 
  634                    actual_strategy.
process(mtx_row_ptrs, mtx_srow);
 
  636                    actual_strategy.
process(row_ptrs_host, mtx_srow);
 
  638                this->set_name(actual_strategy.
get_name());
 
  640                index_type maxnum = 0;
 
  641                for (
size_type i = 0; i < num_rows; i++) {
 
  642                    maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
 
  644                if (maxnum > row_len_limit) {
 
  646                        nwarps_, warp_size_, cuda_strategy_, strategy_name_);
 
  647                    if (is_mtx_on_host) {
 
  648                        actual_strategy.
process(mtx_row_ptrs, mtx_srow);
 
  650                        actual_strategy.
process(row_ptrs_host, mtx_srow);
 
  652                    this->set_name(actual_strategy.
get_name());
 
  655                    if (is_mtx_on_host) {
 
  656                        actual_strategy.
process(mtx_row_ptrs, mtx_srow);
 
  657                        max_length_per_row_ =
 
  658                            actual_strategy.get_max_length_per_row();
 
  660                        actual_strategy.
process(row_ptrs_host, mtx_srow);
 
  661                        max_length_per_row_ =
 
  662                            actual_strategy.get_max_length_per_row();
 
  664                    this->set_name(actual_strategy.
get_name());
 
 
  671            return std::make_shared<load_balance>(
 
  672                       nwarps_, warp_size_, cuda_strategy_, strategy_name_)
 
 
  676        index_type get_max_length_per_row() const noexcept
 
  678            return max_length_per_row_;
 
  681        std::shared_ptr<strategy_type> 
copy()
 override 
  683            return std::make_shared<automatical>(
 
  684                nwarps_, warp_size_, cuda_strategy_, strategy_name_);
 
 
  691        std::string strategy_name_;
 
  692        index_type max_length_per_row_;
 
 
  695    friend class Csr<previous_precision<ValueType>, IndexType>;
 
  702#if GINKGO_ENABLE_HALF 
  703    friend class Csr<previous_precision<previous_precision<ValueType>>,
 
  711                        result) 
const override;
 
  745    void read(
const mat_data& data) 
override;
 
  747    void read(
const device_mat_data& data) 
override;
 
  749    void read(device_mat_data&& data) 
override;
 
  751    void write(mat_data& data) 
const override;
 
  791        bool invert = 
false) 
const;
 
  823        bool invert = 
false) 
const;
 
  825    std::unique_ptr<LinOp> 
permute(
 
  828    std::unique_ptr<LinOp> inverse_permute(
 
  831    std::unique_ptr<LinOp> row_permute(
 
  834    std::unique_ptr<LinOp> column_permute(
 
  837    std::unique_ptr<LinOp> inverse_row_permute(
 
  840    std::unique_ptr<LinOp> inverse_column_permute(
 
  860    bool is_sorted_by_column_index() 
const;
 
  867    value_type* 
get_values() noexcept { 
return values_.get_data(); }
 
  878        return values_.get_const_data();
 
 
  897        return col_idxs_.get_const_data();
 
 
  916        return row_ptrs_.get_const_data();
 
 
  924    index_type* 
get_srow() noexcept { 
return srow_.get_data(); }
 
  935        return srow_.get_const_data();
 
 
  945        return srow_.get_size();
 
 
  955        return values_.get_size();
 
 
  974        strategy_ = std::move(strategy->copy());
 
 
  987        GKO_ASSERT_EQUAL_DIMENSIONS(alpha, 
dim<2>(1, 1));
 
 
 1000        GKO_ASSERT_EQUAL_DIMENSIONS(alpha, 
dim<2>(1, 1));
 
 
 1012    static std::unique_ptr<Csr> 
create(std::shared_ptr<const Executor> exec,
 
 1013                                       std::shared_ptr<strategy_type> strategy);
 
 1027        std::shared_ptr<const Executor> exec, 
const dim<2>& size = {},
 
 1029        std::shared_ptr<strategy_type> strategy = 
nullptr);
 
 1051        std::shared_ptr<const Executor> exec, 
const dim<2>& size,
 
 1054        std::shared_ptr<strategy_type> strategy = 
nullptr);
 
 1060    template <
typename InputValueType, 
typename InputColumnIndexType,
 
 1061              typename InputRowPtrType>
 
 1063        "explicitly construct the gko::array argument instead of passing " 
 1064        "initializer lists")
 
 1067        std::initializer_list<InputValueType> values,
 
 1068        std::initializer_list<InputColumnIndexType> col_idxs,
 
 1069        std::initializer_list<InputRowPtrType> row_ptrs)
 
 
 1092        std::shared_ptr<const Executor> exec, 
const dim<2>& size,
 
 1093        gko::detail::const_array_view<ValueType>&& values,
 
 1094        gko::detail::const_array_view<IndexType>&& col_idxs,
 
 1095        gko::detail::const_array_view<IndexType>&& row_ptrs,
 
 1096        std::shared_ptr<strategy_type> strategy = 
nullptr);
 
 1126        const span& row_span, 
const span& column_span) 
const;
 
 1153    Csr(std::shared_ptr<const Executor> exec, 
const dim<2>& size = {},
 
 1155        std::shared_ptr<strategy_type> strategy = 
nullptr);
 
 1157    Csr(std::shared_ptr<const Executor> exec, 
const dim<2>& size,
 
 1160        std::shared_ptr<strategy_type> strategy = 
nullptr);
 
 1162    void apply_impl(
const LinOp* b, 
LinOp* x) 
const override;
 
 1164    void apply_impl(
const LinOp* alpha, 
const LinOp* b, 
const LinOp* beta,
 
 1165                    LinOp* x) 
const override;
 
 1168    static std::shared_ptr<strategy_type> make_default_strategy(
 
 1169        std::shared_ptr<const Executor> exec)
 
 1171        auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
 
 1172        auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
 
 1173        auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
 
 1174        std::shared_ptr<strategy_type> new_strategy;
 
 1176            new_strategy = std::make_shared<automatical>(cuda_exec);
 
 1177        } 
else if (hip_exec) {
 
 1178            new_strategy = std::make_shared<automatical>(hip_exec);
 
 1179        } 
else if (dpcpp_exec) {
 
 1180            new_strategy = std::make_shared<automatical>(dpcpp_exec);
 
 1182            new_strategy = std::make_shared<classical>();
 
 1184        return new_strategy;
 
 1188    template <
typename CsrType>
 
 1189    void convert_strategy_helper(CsrType* result)
 const 
 1192        std::shared_ptr<typename CsrType::strategy_type> new_strat;
 
 1194            new_strat = std::make_shared<typename CsrType::classical>();
 
 1195        } 
else if (
dynamic_cast<merge_path*
>(strat)) {
 
 1196            new_strat = std::make_shared<typename CsrType::merge_path>();
 
 1197        } 
else if (
dynamic_cast<cusparse*
>(strat)) {
 
 1198            new_strat = std::make_shared<typename CsrType::cusparse>();
 
 1199        } 
else if (
dynamic_cast<sparselib*
>(strat)) {
 
 1200            new_strat = std::make_shared<typename CsrType::sparselib>();
 
 1202            auto rexec = result->get_executor();
 
 1204                std::dynamic_pointer_cast<const CudaExecutor>(rexec);
 
 1205            auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
 
 1207                std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
 
 1212                        std::make_shared<typename CsrType::load_balance>(
 
 1215                    new_strat = std::make_shared<typename CsrType::automatical>(
 
 1218            } 
else if (hip_exec) {
 
 1221                        std::make_shared<typename CsrType::load_balance>(
 
 1224                    new_strat = std::make_shared<typename CsrType::automatical>(
 
 1227            } 
else if (dpcpp_exec) {
 
 1230                        std::make_shared<typename CsrType::load_balance>(
 
 1233                    new_strat = std::make_shared<typename CsrType::automatical>(
 
 1238                auto this_cuda_exec =
 
 1239                    std::dynamic_pointer_cast<const CudaExecutor>(
 
 1241                auto this_hip_exec =
 
 1242                    std::dynamic_pointer_cast<const HipExecutor>(
 
 1244                auto this_dpcpp_exec =
 
 1245                    std::dynamic_pointer_cast<const DpcppExecutor>(
 
 1247                if (this_cuda_exec) {
 
 1250                            std::make_shared<typename CsrType::load_balance>(
 
 1254                            std::make_shared<typename CsrType::automatical>(
 
 1257                } 
else if (this_hip_exec) {
 
 1260                            std::make_shared<typename CsrType::load_balance>(
 
 1264                            std::make_shared<typename CsrType::automatical>(
 
 1267                } 
else if (this_dpcpp_exec) {
 
 1270                            std::make_shared<typename CsrType::load_balance>(
 
 1274                            std::make_shared<typename CsrType::automatical>(
 
 1282                    new_strat = std::make_shared<typename CsrType::classical>();
 
 1286        result->set_strategy(new_strat);
 
 1294        srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
 
 1295        strategy_->process(row_ptrs_, &srow_);
 
 1304    virtual void scale_impl(
const LinOp* alpha);
 
 1312    virtual void inv_scale_impl(
const LinOp* alpha);
 
 1315    std::shared_ptr<strategy_type> strategy_;
 
 1316    array<value_type> values_;
 
 1317    array<index_type> col_idxs_;
 
 1318    array<index_type> row_ptrs_;
 
 1319    array<index_type> srow_;
 
 1321    void add_scaled_identity_impl(
const LinOp* a, 
const LinOp* b) 
override;
 
 
 1334template <
typename ValueType, 
typename IndexType>
 
 1335void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
 
 1337    using load_balance = 
typename Csr<ValueType, IndexType>::load_balance;
 
 1338    using automatical = 
typename Csr<ValueType, IndexType>::automatical;
 
 1339    auto strategy = result->get_strategy();
 
 1340    auto executor = result->get_executor();
 
 1341    if (std::dynamic_pointer_cast<load_balance>(strategy)) {
 
 1343                std::dynamic_pointer_cast<const HipExecutor>(executor)) {
 
 1344            result->set_strategy(std::make_shared<load_balance>(exec));
 
 1345        } 
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
 
 1347            result->set_strategy(std::make_shared<load_balance>(exec));
 
 1349    } 
else if (std::dynamic_pointer_cast<automatical>(strategy)) {
 
 1351                std::dynamic_pointer_cast<const HipExecutor>(executor)) {
 
 1352            result->set_strategy(std::make_shared<automatical>(exec));
 
 1353        } 
else if (
auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
 
 1355            result->set_strategy(std::make_shared<automatical>(exec));
 
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition polymorphic_object.hpp:470
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:662
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:234
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition array.hpp:166
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:673
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:689
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:682
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:656
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition coo.hpp:62
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:681
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:589
automatical()
Creates an automatical strategy.
Definition csr.hpp:544
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:669
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:554
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:600
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:574
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:563
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:227
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:234
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:263
classical()
Creates a classical strategy.
Definition csr.hpp:232
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:256
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:302
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:313
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:315
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:307
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:309
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:348
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:411
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:503
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:375
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:356
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:464
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:401
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:366
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:386
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:277
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:288
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:290
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:282
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:284
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:326
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:337
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:333
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:331
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:339
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:170
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:188
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:179
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition csr.hpp:121
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:914
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:933
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:972
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:997
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:924
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition csr.hpp:943
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:905
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy  of this matrix  with the given permutation .
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:876
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:953
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:962
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:895
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:984
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:867
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:886
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy  of this matrix  with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition dense.hpp:117
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:53
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:64
Fixed-block compressed sparse row storage matrix format.
Definition fbcsr.hpp:113
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:55
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:55
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
The matrix namespace.
Definition dense_cache.hpp:15
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
Definition permutation.hpp:53
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:260
typename detail::next_precision_impl< T >::type next_precision
Obtains the next type in the singly-linked precision list with half.
Definition math.hpp:438
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:279
void write(StreamType &&os, MatrixPtrType &&matrix, layout_type layout=detail::mtx_io_traits< std::remove_cv_t< detail::pointee< MatrixPtrType > > >::default_layout)
Writes a matrix into an output stream in matrix market format.
Definition mtx_io.hpp:295
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:590
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:719
std::unique_ptr< MatrixType > read(StreamType &&is, MatrixArgs &&... args)
Reads a matrix stored in matrix market format from an input stream.
Definition mtx_io.hpp:159
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46