Ginkgo Generated from branch based on main. Ginkgo version 1.9.0
A numerical linear algebra library targeting many-core architectures
Loading...
Searching...
No Matches
csr.hpp
1// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
2//
3// SPDX-License-Identifier: BSD-3-Clause
4
5#ifndef GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
6#define GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
7
8
9#include <ginkgo/core/base/array.hpp>
10#include <ginkgo/core/base/index_set.hpp>
11#include <ginkgo/core/base/lin_op.hpp>
12#include <ginkgo/core/base/math.hpp>
13#include <ginkgo/core/matrix/permutation.hpp>
14#include <ginkgo/core/matrix/scaled_permutation.hpp>
15
16
17namespace gko {
18namespace matrix {
19
20
21template <typename ValueType>
22class Dense;
23
24template <typename ValueType>
25class Diagonal;
26
27template <typename ValueType, typename IndexType>
28class Coo;
29
30template <typename ValueType, typename IndexType>
31class Ell;
32
33template <typename ValueType, typename IndexType>
34class Hybrid;
35
36template <typename ValueType, typename IndexType>
37class Sellp;
38
39template <typename ValueType, typename IndexType>
40class SparsityCsr;
41
42template <typename ValueType, typename IndexType>
43class Csr;
44
45template <typename ValueType, typename IndexType>
46class Fbcsr;
47
48template <typename ValueType, typename IndexType>
50
51
52namespace detail {
53
54
55template <typename ValueType = default_precision, typename IndexType = int32>
56void strategy_rebuild_helper(Csr<ValueType, IndexType>* result);
57
58
59} // namespace detail
60
61
100template <typename ValueType = default_precision, typename IndexType = int32>
101class Csr : public EnableLinOp<Csr<ValueType, IndexType>>,
102 public ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>,
103#if GINKGO_ENABLE_HALF
104 public ConvertibleTo<
105 Csr<next_precision<next_precision<ValueType>>, IndexType>>,
106#endif
107 public ConvertibleTo<Dense<ValueType>>,
108 public ConvertibleTo<Coo<ValueType, IndexType>>,
109 public ConvertibleTo<Ell<ValueType, IndexType>>,
110 public ConvertibleTo<Fbcsr<ValueType, IndexType>>,
111 public ConvertibleTo<Hybrid<ValueType, IndexType>>,
112 public ConvertibleTo<Sellp<ValueType, IndexType>>,
113 public ConvertibleTo<SparsityCsr<ValueType, IndexType>>,
114 public DiagonalExtractable<ValueType>,
115 public ReadableFromMatrixData<ValueType, IndexType>,
116 public WritableToMatrixData<ValueType, IndexType>,
117 public Transposable,
118 public Permutable<IndexType>,
120 remove_complex<Csr<ValueType, IndexType>>>,
121 public ScaledIdentityAddable {
122 friend class EnablePolymorphicObject<Csr, LinOp>;
123 friend class Coo<ValueType, IndexType>;
124 friend class Dense<ValueType>;
125 friend class Diagonal<ValueType>;
126 friend class Ell<ValueType, IndexType>;
127 friend class Hybrid<ValueType, IndexType>;
128 friend class Sellp<ValueType, IndexType>;
129 friend class SparsityCsr<ValueType, IndexType>;
130 friend class Fbcsr<ValueType, IndexType>;
131 friend class CsrBuilder<ValueType, IndexType>;
132 friend class Csr<to_complex<ValueType>, IndexType>;
133
134public:
135 using EnableLinOp<Csr>::convert_to;
136 using EnableLinOp<Csr>::move_to;
137 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::convert_to;
138 using ConvertibleTo<Csr<next_precision<ValueType>, IndexType>>::move_to;
139 using ConvertibleTo<Dense<ValueType>>::convert_to;
140 using ConvertibleTo<Dense<ValueType>>::move_to;
141 using ConvertibleTo<Coo<ValueType, IndexType>>::convert_to;
143 using ConvertibleTo<Ell<ValueType, IndexType>>::convert_to;
153 using ReadableFromMatrixData<ValueType, IndexType>::read;
154
155 using value_type = ValueType;
156 using index_type = IndexType;
157 using transposed_type = Csr<ValueType, IndexType>;
158 using mat_data = matrix_data<ValueType, IndexType>;
159 using device_mat_data = device_matrix_data<ValueType, IndexType>;
160 using absolute_type = remove_complex<Csr>;
161
162 class automatical;
163
171 friend class automatical;
172
173 public:
179 strategy_type(std::string name) : name_(name) {}
180
181 virtual ~strategy_type() = default;
182
188 std::string get_name() { return name_; }
189
196 virtual void process(const array<index_type>& mtx_row_ptrs,
197 array<index_type>* mtx_srow) = 0;
198
206 virtual int64_t clac_size(const int64_t nnz) = 0;
207
212 virtual std::shared_ptr<strategy_type> copy() = 0;
213
214 protected:
215 void set_name(std::string name) { name_ = name; }
216
217 private:
218 std::string name_;
219 };
220
227 class classical : public strategy_type {
228 public:
232 classical() : strategy_type("classical"), max_length_per_row_(0) {}
233
234 void process(const array<index_type>& mtx_row_ptrs,
235 array<index_type>* mtx_srow) override
236 {
237 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
238 array<index_type> row_ptrs_host(host_mtx_exec);
239 const bool is_mtx_on_host{host_mtx_exec ==
240 mtx_row_ptrs.get_executor()};
241 const index_type* row_ptrs{};
242 if (is_mtx_on_host) {
243 row_ptrs = mtx_row_ptrs.get_const_data();
244 } else {
245 row_ptrs_host = mtx_row_ptrs;
246 row_ptrs = row_ptrs_host.get_const_data();
247 }
248 auto num_rows = mtx_row_ptrs.get_size() - 1;
249 max_length_per_row_ = 0;
250 for (size_type i = 0; i < num_rows; i++) {
251 max_length_per_row_ = std::max(max_length_per_row_,
252 row_ptrs[i + 1] - row_ptrs[i]);
253 }
254 }
255
256 int64_t clac_size(const int64_t nnz) override { return 0; }
257
258 index_type get_max_length_per_row() const noexcept
259 {
260 return max_length_per_row_;
261 }
262
263 std::shared_ptr<strategy_type> copy() override
264 {
265 return std::make_shared<classical>();
266 }
267
268 private:
269 index_type max_length_per_row_;
270 };
271
277 class merge_path : public strategy_type {
278 public:
282 merge_path() : strategy_type("merge_path") {}
283
284 void process(const array<index_type>& mtx_row_ptrs,
285 array<index_type>* mtx_srow) override
286 {}
287
288 int64_t clac_size(const int64_t nnz) override { return 0; }
289
290 std::shared_ptr<strategy_type> copy() override
291 {
292 return std::make_shared<merge_path>();
293 }
294 };
295
302 class cusparse : public strategy_type {
303 public:
307 cusparse() : strategy_type("cusparse") {}
308
309 void process(const array<index_type>& mtx_row_ptrs,
310 array<index_type>* mtx_srow) override
311 {}
312
313 int64_t clac_size(const int64_t nnz) override { return 0; }
314
315 std::shared_ptr<strategy_type> copy() override
316 {
317 return std::make_shared<cusparse>();
318 }
319 };
320
326 class sparselib : public strategy_type {
327 public:
331 sparselib() : strategy_type("sparselib") {}
332
333 void process(const array<index_type>& mtx_row_ptrs,
334 array<index_type>* mtx_srow) override
335 {}
336
337 int64_t clac_size(const int64_t nnz) override { return 0; }
338
339 std::shared_ptr<strategy_type> copy() override
340 {
341 return std::make_shared<sparselib>();
342 }
343 };
344
349 public:
356 [[deprecated]] load_balance()
357 : load_balance(std::move(
359 {}
360
366 load_balance(std::shared_ptr<const CudaExecutor> exec)
367 : load_balance(exec->get_num_warps(), exec->get_warp_size())
368 {}
369
375 load_balance(std::shared_ptr<const HipExecutor> exec)
376 : load_balance(exec->get_num_warps(), exec->get_warp_size(), false)
377 {}
378
386 load_balance(std::shared_ptr<const DpcppExecutor> exec)
387 : load_balance(exec->get_num_subgroups(), 32, false, "intel")
388 {}
389
401 load_balance(int64_t nwarps, int warp_size = 32,
402 bool cuda_strategy = true,
403 std::string strategy_name = "none")
404 : strategy_type("load_balance"),
405 nwarps_(nwarps),
406 warp_size_(warp_size),
407 cuda_strategy_(cuda_strategy),
408 strategy_name_(strategy_name)
409 {}
410
411 void process(const array<index_type>& mtx_row_ptrs,
412 array<index_type>* mtx_srow) override
413 {
414 auto nwarps = mtx_srow->get_size();
415
416 if (nwarps > 0) {
417 auto host_srow_exec = mtx_srow->get_executor()->get_master();
418 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
419 const bool is_srow_on_host{host_srow_exec ==
420 mtx_srow->get_executor()};
421 const bool is_mtx_on_host{host_mtx_exec ==
422 mtx_row_ptrs.get_executor()};
423 array<index_type> row_ptrs_host(host_mtx_exec);
424 array<index_type> srow_host(host_srow_exec);
425 const index_type* row_ptrs{};
426 index_type* srow{};
427 if (is_srow_on_host) {
428 srow = mtx_srow->get_data();
429 } else {
430 srow_host = *mtx_srow;
431 srow = srow_host.get_data();
432 }
433 if (is_mtx_on_host) {
434 row_ptrs = mtx_row_ptrs.get_const_data();
435 } else {
436 row_ptrs_host = mtx_row_ptrs;
437 row_ptrs = row_ptrs_host.get_const_data();
438 }
439 for (size_type i = 0; i < nwarps; i++) {
440 srow[i] = 0;
441 }
442 const auto num_rows = mtx_row_ptrs.get_size() - 1;
443 const auto num_elems = row_ptrs[num_rows];
444 const auto bucket_divider =
445 num_elems > 0 ? ceildiv(num_elems, warp_size_) : 1;
446 for (size_type i = 0; i < num_rows; i++) {
447 auto bucket =
448 ceildiv((ceildiv(row_ptrs[i + 1], warp_size_) * nwarps),
449 bucket_divider);
450 if (bucket < nwarps) {
451 srow[bucket]++;
452 }
453 }
454 // find starting row for thread i
455 for (size_type i = 1; i < nwarps; i++) {
456 srow[i] += srow[i - 1];
457 }
458 if (!is_srow_on_host) {
459 *mtx_srow = srow_host;
460 }
461 }
462 }
463
464 int64_t clac_size(const int64_t nnz) override
465 {
466 if (warp_size_ > 0) {
467 int multiple = 8;
468 if (nnz >= static_cast<int64_t>(2e8)) {
469 multiple = 2048;
470 } else if (nnz >= static_cast<int64_t>(2e7)) {
471 multiple = 512;
472 } else if (nnz >= static_cast<int64_t>(2e6)) {
473 multiple = 128;
474 } else if (nnz >= static_cast<int64_t>(2e5)) {
475 multiple = 32;
476 }
477 if (strategy_name_ == "intel") {
478 multiple = 8;
479 if (nnz >= static_cast<int64_t>(2e8)) {
480 multiple = 256;
481 } else if (nnz >= static_cast<int64_t>(2e7)) {
482 multiple = 32;
483 }
484 }
485#if GINKGO_HIP_PLATFORM_HCC
486 if (!cuda_strategy_) {
487 multiple = 8;
488 if (nnz >= static_cast<int64_t>(1e7)) {
489 multiple = 64;
490 } else if (nnz >= static_cast<int64_t>(1e6)) {
491 multiple = 16;
492 }
493 }
494#endif // GINKGO_HIP_PLATFORM_HCC
495
496 auto nwarps = nwarps_ * multiple;
497 return min(ceildiv(nnz, warp_size_), nwarps);
498 } else {
499 return 0;
500 }
501 }
502
503 std::shared_ptr<strategy_type> copy() override
504 {
505 return std::make_shared<load_balance>(
506 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
507 }
508
509 private:
510 int64_t nwarps_;
511 int warp_size_;
512 bool cuda_strategy_;
513 std::string strategy_name_;
514 };
515
516 class automatical : public strategy_type {
517 public:
518 /* Use imbalance strategy when the maximum number of nonzero per row is
519 * more than 1024 on NVIDIA hardware */
520 const index_type nvidia_row_len_limit = 1024;
521 /* Use imbalance strategy when the matrix has more more than 1e6 on
522 * NVIDIA hardware */
523 const index_type nvidia_nnz_limit{static_cast<index_type>(1e6)};
524 /* Use imbalance strategy when the maximum number of nonzero per row is
525 * more than 768 on AMD hardware */
526 const index_type amd_row_len_limit = 768;
527 /* Use imbalance strategy when the matrix has more more than 1e8 on AMD
528 * hardware */
529 const index_type amd_nnz_limit{static_cast<index_type>(1e8)};
530 /* Use imbalance strategy when the maximum number of nonzero per row is
531 * more than 25600 on Intel hardware */
532 const index_type intel_row_len_limit = 25600;
533 /* Use imbalance strategy when the matrix has more more than 3e8 on
534 * Intel hardware */
535 const index_type intel_nnz_limit{static_cast<index_type>(3e8)};
536
537 public:
544 [[deprecated]] automatical()
545 : automatical(std::move(
547 {}
548
554 automatical(std::shared_ptr<const CudaExecutor> exec)
555 : automatical(exec->get_num_warps(), exec->get_warp_size())
556 {}
557
563 automatical(std::shared_ptr<const HipExecutor> exec)
564 : automatical(exec->get_num_warps(), exec->get_warp_size(), false)
565 {}
566
574 automatical(std::shared_ptr<const DpcppExecutor> exec)
575 : automatical(exec->get_num_subgroups(), 32, false, "intel")
576 {}
577
589 automatical(int64_t nwarps, int warp_size = 32,
590 bool cuda_strategy = true,
591 std::string strategy_name = "none")
592 : strategy_type("automatical"),
593 nwarps_(nwarps),
594 warp_size_(warp_size),
595 cuda_strategy_(cuda_strategy),
596 strategy_name_(strategy_name),
597 max_length_per_row_(0)
598 {}
599
600 void process(const array<index_type>& mtx_row_ptrs,
601 array<index_type>* mtx_srow) override
602 {
603 // if the number of stored elements is larger than <nnz_limit> or
604 // the maximum number of stored elements per row is larger than
605 // <row_len_limit>, use load_balance otherwise use classical
606 index_type nnz_limit = nvidia_nnz_limit;
607 index_type row_len_limit = nvidia_row_len_limit;
608 if (strategy_name_ == "intel") {
609 nnz_limit = intel_nnz_limit;
610 row_len_limit = intel_row_len_limit;
611 }
612#if GINKGO_HIP_PLATFORM_HCC
613 if (!cuda_strategy_) {
614 nnz_limit = amd_nnz_limit;
615 row_len_limit = amd_row_len_limit;
616 }
617#endif // GINKGO_HIP_PLATFORM_HCC
618 auto host_mtx_exec = mtx_row_ptrs.get_executor()->get_master();
619 const bool is_mtx_on_host{host_mtx_exec ==
620 mtx_row_ptrs.get_executor()};
621 array<index_type> row_ptrs_host(host_mtx_exec);
622 const index_type* row_ptrs{};
623 if (is_mtx_on_host) {
624 row_ptrs = mtx_row_ptrs.get_const_data();
625 } else {
626 row_ptrs_host = mtx_row_ptrs;
627 row_ptrs = row_ptrs_host.get_const_data();
628 }
629 const auto num_rows = mtx_row_ptrs.get_size() - 1;
630 if (row_ptrs[num_rows] > nnz_limit) {
631 load_balance actual_strategy(nwarps_, warp_size_,
632 cuda_strategy_, strategy_name_);
633 if (is_mtx_on_host) {
634 actual_strategy.process(mtx_row_ptrs, mtx_srow);
635 } else {
636 actual_strategy.process(row_ptrs_host, mtx_srow);
637 }
638 this->set_name(actual_strategy.get_name());
639 } else {
640 index_type maxnum = 0;
641 for (size_type i = 0; i < num_rows; i++) {
642 maxnum = std::max(maxnum, row_ptrs[i + 1] - row_ptrs[i]);
643 }
644 if (maxnum > row_len_limit) {
645 load_balance actual_strategy(
646 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
647 if (is_mtx_on_host) {
648 actual_strategy.process(mtx_row_ptrs, mtx_srow);
649 } else {
650 actual_strategy.process(row_ptrs_host, mtx_srow);
651 }
652 this->set_name(actual_strategy.get_name());
653 } else {
654 classical actual_strategy;
655 if (is_mtx_on_host) {
656 actual_strategy.process(mtx_row_ptrs, mtx_srow);
657 max_length_per_row_ =
658 actual_strategy.get_max_length_per_row();
659 } else {
660 actual_strategy.process(row_ptrs_host, mtx_srow);
661 max_length_per_row_ =
662 actual_strategy.get_max_length_per_row();
663 }
664 this->set_name(actual_strategy.get_name());
665 }
666 }
667 }
668
669 int64_t clac_size(const int64_t nnz) override
670 {
671 return std::make_shared<load_balance>(
672 nwarps_, warp_size_, cuda_strategy_, strategy_name_)
673 ->clac_size(nnz);
674 }
675
676 index_type get_max_length_per_row() const noexcept
677 {
678 return max_length_per_row_;
679 }
680
681 std::shared_ptr<strategy_type> copy() override
682 {
683 return std::make_shared<automatical>(
684 nwarps_, warp_size_, cuda_strategy_, strategy_name_);
685 }
686
687 private:
688 int64_t nwarps_;
689 int warp_size_;
690 bool cuda_strategy_;
691 std::string strategy_name_;
692 index_type max_length_per_row_;
693 };
694
695 friend class Csr<previous_precision<ValueType>, IndexType>;
696
697 void convert_to(
698 Csr<next_precision<ValueType>, IndexType>* result) const override;
699
700 void move_to(Csr<next_precision<ValueType>, IndexType>* result) override;
701
702#if GINKGO_ENABLE_HALF
703 friend class Csr<previous_precision<previous_precision<ValueType>>,
704 IndexType>;
705 using ConvertibleTo<
706 Csr<next_precision<next_precision<ValueType>>, IndexType>>::convert_to;
707 using ConvertibleTo<
709
710 void convert_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
711 result) const override;
712
713 void move_to(Csr<next_precision<next_precision<ValueType>>, IndexType>*
714 result) override;
715#endif
716
717 void convert_to(Dense<ValueType>* other) const override;
718
719 void move_to(Dense<ValueType>* other) override;
720
721 void convert_to(Coo<ValueType, IndexType>* result) const override;
722
723 void move_to(Coo<ValueType, IndexType>* result) override;
724
725 void convert_to(Ell<ValueType, IndexType>* result) const override;
726
727 void move_to(Ell<ValueType, IndexType>* result) override;
728
729 void convert_to(Fbcsr<ValueType, IndexType>* result) const override;
730
731 void move_to(Fbcsr<ValueType, IndexType>* result) override;
732
733 void convert_to(Hybrid<ValueType, IndexType>* result) const override;
734
735 void move_to(Hybrid<ValueType, IndexType>* result) override;
736
737 void convert_to(Sellp<ValueType, IndexType>* result) const override;
738
739 void move_to(Sellp<ValueType, IndexType>* result) override;
740
741 void convert_to(SparsityCsr<ValueType, IndexType>* result) const override;
742
743 void move_to(SparsityCsr<ValueType, IndexType>* result) override;
744
745 void read(const mat_data& data) override;
746
747 void read(const device_mat_data& data) override;
748
749 void read(device_mat_data&& data) override;
750
751 void write(mat_data& data) const override;
752
753 std::unique_ptr<LinOp> transpose() const override;
754
755 std::unique_ptr<LinOp> conj_transpose() const override;
756
771 std::unique_ptr<Csr> permute(
772 ptr_param<const Permutation<index_type>> permutation,
774
788 std::unique_ptr<Csr> permute(
789 ptr_param<const Permutation<index_type>> row_permutation,
790 ptr_param<const Permutation<index_type>> column_permutation,
791 bool invert = false) const;
792
802 std::unique_ptr<Csr> scale_permute(
805
818 std::unique_ptr<Csr> scale_permute(
820 row_permutation,
822 column_permutation,
823 bool invert = false) const;
824
825 std::unique_ptr<LinOp> permute(
826 const array<IndexType>* permutation_indices) const override;
827
828 std::unique_ptr<LinOp> inverse_permute(
829 const array<IndexType>* inverse_permutation_indices) const override;
830
831 std::unique_ptr<LinOp> row_permute(
832 const array<IndexType>* permutation_indices) const override;
833
834 std::unique_ptr<LinOp> column_permute(
835 const array<IndexType>* permutation_indices) const override;
836
837 std::unique_ptr<LinOp> inverse_row_permute(
838 const array<IndexType>* inverse_permutation_indices) const override;
839
840 std::unique_ptr<LinOp> inverse_column_permute(
841 const array<IndexType>* inverse_permutation_indices) const override;
842
843 std::unique_ptr<Diagonal<ValueType>> extract_diagonal() const override;
844
845 std::unique_ptr<absolute_type> compute_absolute() const override;
846
848
853
854 /*
855 * Tests if all row entry pairs (value, col_idx) are sorted by column index
856 *
857 * @returns True if all row entry pairs (value, col_idx) are sorted by
858 * column index
859 */
860 bool is_sorted_by_column_index() const;
861
867 value_type* get_values() noexcept { return values_.get_data(); }
868
876 const value_type* get_const_values() const noexcept
877 {
878 return values_.get_const_data();
879 }
880
886 index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); }
887
895 const index_type* get_const_col_idxs() const noexcept
896 {
897 return col_idxs_.get_const_data();
898 }
899
905 index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); }
906
914 const index_type* get_const_row_ptrs() const noexcept
915 {
916 return row_ptrs_.get_const_data();
917 }
918
924 index_type* get_srow() noexcept { return srow_.get_data(); }
925
933 const index_type* get_const_srow() const noexcept
934 {
935 return srow_.get_const_data();
936 }
937
944 {
945 return srow_.get_size();
946 }
947
954 {
955 return values_.get_size();
956 }
957
962 std::shared_ptr<strategy_type> get_strategy() const noexcept
963 {
964 return strategy_;
965 }
966
972 void set_strategy(std::shared_ptr<strategy_type> strategy)
973 {
974 strategy_ = std::move(strategy->copy());
975 this->make_srow();
976 }
977
985 {
986 auto exec = this->get_executor();
987 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
988 this->scale_impl(make_temporary_clone(exec, alpha).get());
989 }
990
998 {
999 auto exec = this->get_executor();
1000 GKO_ASSERT_EQUAL_DIMENSIONS(alpha, dim<2>(1, 1));
1001 this->inv_scale_impl(make_temporary_clone(exec, alpha).get());
1002 }
1003
1012 static std::unique_ptr<Csr> create(std::shared_ptr<const Executor> exec,
1013 std::shared_ptr<strategy_type> strategy);
1014
1026 static std::unique_ptr<Csr> create(
1027 std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1028 size_type num_nonzeros = {},
1029 std::shared_ptr<strategy_type> strategy = nullptr);
1030
1050 static std::unique_ptr<Csr> create(
1051 std::shared_ptr<const Executor> exec, const dim<2>& size,
1052 array<value_type> values, array<index_type> col_idxs,
1053 array<index_type> row_ptrs,
1054 std::shared_ptr<strategy_type> strategy = nullptr);
1055
1060 template <typename InputValueType, typename InputColumnIndexType,
1061 typename InputRowPtrType>
1062 GKO_DEPRECATED(
1063 "explicitly construct the gko::array argument instead of passing "
1064 "initializer lists")
1065 static std::unique_ptr<Csr> create(
1066 std::shared_ptr<const Executor> exec, const dim<2>& size,
1067 std::initializer_list<InputValueType> values,
1068 std::initializer_list<InputColumnIndexType> col_idxs,
1069 std::initializer_list<InputRowPtrType> row_ptrs)
1070 {
1071 return create(exec, size, array<value_type>{exec, std::move(values)},
1072 array<index_type>{exec, std::move(col_idxs)},
1073 array<index_type>{exec, std::move(row_ptrs)});
1074 }
1075
1091 static std::unique_ptr<const Csr> create_const(
1092 std::shared_ptr<const Executor> exec, const dim<2>& size,
1093 gko::detail::const_array_view<ValueType>&& values,
1094 gko::detail::const_array_view<IndexType>&& col_idxs,
1095 gko::detail::const_array_view<IndexType>&& row_ptrs,
1096 std::shared_ptr<strategy_type> strategy = nullptr);
1097
1110 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1111 const index_set<IndexType>& row_index_set,
1112 const index_set<IndexType>& column_index_set) const;
1113
1125 std::unique_ptr<Csr<ValueType, IndexType>> create_submatrix(
1126 const span& row_span, const span& column_span) const;
1127
1132
1139
1143 Csr(const Csr&);
1144
1151
1152protected:
1153 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size = {},
1154 size_type num_nonzeros = {},
1155 std::shared_ptr<strategy_type> strategy = nullptr);
1156
1157 Csr(std::shared_ptr<const Executor> exec, const dim<2>& size,
1158 array<value_type> values, array<index_type> col_idxs,
1159 array<index_type> row_ptrs,
1160 std::shared_ptr<strategy_type> strategy = nullptr);
1161
1162 void apply_impl(const LinOp* b, LinOp* x) const override;
1163
1164 void apply_impl(const LinOp* alpha, const LinOp* b, const LinOp* beta,
1165 LinOp* x) const override;
1166
1167 // TODO: This provides some more sane settings. Please fix this!
1168 static std::shared_ptr<strategy_type> make_default_strategy(
1169 std::shared_ptr<const Executor> exec)
1170 {
1171 auto cuda_exec = std::dynamic_pointer_cast<const CudaExecutor>(exec);
1172 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(exec);
1173 auto dpcpp_exec = std::dynamic_pointer_cast<const DpcppExecutor>(exec);
1174 std::shared_ptr<strategy_type> new_strategy;
1175 if (cuda_exec) {
1176 new_strategy = std::make_shared<automatical>(cuda_exec);
1177 } else if (hip_exec) {
1178 new_strategy = std::make_shared<automatical>(hip_exec);
1179 } else if (dpcpp_exec) {
1180 new_strategy = std::make_shared<automatical>(dpcpp_exec);
1181 } else {
1182 new_strategy = std::make_shared<classical>();
1183 }
1184 return new_strategy;
1185 }
1186
1187 // TODO clean this up as soon as we improve strategy_type
1188 template <typename CsrType>
1189 void convert_strategy_helper(CsrType* result) const
1190 {
1191 auto strat = this->get_strategy().get();
1192 std::shared_ptr<typename CsrType::strategy_type> new_strat;
1193 if (dynamic_cast<classical*>(strat)) {
1194 new_strat = std::make_shared<typename CsrType::classical>();
1195 } else if (dynamic_cast<merge_path*>(strat)) {
1196 new_strat = std::make_shared<typename CsrType::merge_path>();
1197 } else if (dynamic_cast<cusparse*>(strat)) {
1198 new_strat = std::make_shared<typename CsrType::cusparse>();
1199 } else if (dynamic_cast<sparselib*>(strat)) {
1200 new_strat = std::make_shared<typename CsrType::sparselib>();
1201 } else {
1202 auto rexec = result->get_executor();
1203 auto cuda_exec =
1204 std::dynamic_pointer_cast<const CudaExecutor>(rexec);
1205 auto hip_exec = std::dynamic_pointer_cast<const HipExecutor>(rexec);
1206 auto dpcpp_exec =
1207 std::dynamic_pointer_cast<const DpcppExecutor>(rexec);
1208 auto lb = dynamic_cast<load_balance*>(strat);
1209 if (cuda_exec) {
1210 if (lb) {
1211 new_strat =
1212 std::make_shared<typename CsrType::load_balance>(
1213 cuda_exec);
1214 } else {
1215 new_strat = std::make_shared<typename CsrType::automatical>(
1216 cuda_exec);
1217 }
1218 } else if (hip_exec) {
1219 if (lb) {
1220 new_strat =
1221 std::make_shared<typename CsrType::load_balance>(
1222 hip_exec);
1223 } else {
1224 new_strat = std::make_shared<typename CsrType::automatical>(
1225 hip_exec);
1226 }
1227 } else if (dpcpp_exec) {
1228 if (lb) {
1229 new_strat =
1230 std::make_shared<typename CsrType::load_balance>(
1231 dpcpp_exec);
1232 } else {
1233 new_strat = std::make_shared<typename CsrType::automatical>(
1234 dpcpp_exec);
1235 }
1236 } else {
1237 // Try to preserve this executor's configuration
1238 auto this_cuda_exec =
1239 std::dynamic_pointer_cast<const CudaExecutor>(
1240 this->get_executor());
1241 auto this_hip_exec =
1242 std::dynamic_pointer_cast<const HipExecutor>(
1243 this->get_executor());
1244 auto this_dpcpp_exec =
1245 std::dynamic_pointer_cast<const DpcppExecutor>(
1246 this->get_executor());
1247 if (this_cuda_exec) {
1248 if (lb) {
1249 new_strat =
1250 std::make_shared<typename CsrType::load_balance>(
1251 this_cuda_exec);
1252 } else {
1253 new_strat =
1254 std::make_shared<typename CsrType::automatical>(
1255 this_cuda_exec);
1256 }
1257 } else if (this_hip_exec) {
1258 if (lb) {
1259 new_strat =
1260 std::make_shared<typename CsrType::load_balance>(
1261 this_hip_exec);
1262 } else {
1263 new_strat =
1264 std::make_shared<typename CsrType::automatical>(
1265 this_hip_exec);
1266 }
1267 } else if (this_dpcpp_exec) {
1268 if (lb) {
1269 new_strat =
1270 std::make_shared<typename CsrType::load_balance>(
1271 this_dpcpp_exec);
1272 } else {
1273 new_strat =
1274 std::make_shared<typename CsrType::automatical>(
1275 this_dpcpp_exec);
1276 }
1277 } else {
1278 // FIXME: this changes strategies.
1279 // We had a load balance or automatical strategy from a non
1280 // HIP or Cuda executor and are moving to a non HIP or Cuda
1281 // executor.
1282 new_strat = std::make_shared<typename CsrType::classical>();
1283 }
1284 }
1285 }
1286 result->set_strategy(new_strat);
1287 }
1288
1292 void make_srow()
1293 {
1294 srow_.resize_and_reset(strategy_->clac_size(values_.get_size()));
1295 strategy_->process(row_ptrs_, &srow_);
1296 }
1297
1304 virtual void scale_impl(const LinOp* alpha);
1305
1312 virtual void inv_scale_impl(const LinOp* alpha);
1313
1314private:
1315 std::shared_ptr<strategy_type> strategy_;
1316 array<value_type> values_;
1317 array<index_type> col_idxs_;
1318 array<index_type> row_ptrs_;
1319 array<index_type> srow_;
1320
1321 void add_scaled_identity_impl(const LinOp* a, const LinOp* b) override;
1322};
1323
1324
1325namespace detail {
1326
1327
1334template <typename ValueType, typename IndexType>
1335void strategy_rebuild_helper(Csr<ValueType, IndexType>* result)
1336{
1337 using load_balance = typename Csr<ValueType, IndexType>::load_balance;
1338 using automatical = typename Csr<ValueType, IndexType>::automatical;
1339 auto strategy = result->get_strategy();
1340 auto executor = result->get_executor();
1341 if (std::dynamic_pointer_cast<load_balance>(strategy)) {
1342 if (auto exec =
1343 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1344 result->set_strategy(std::make_shared<load_balance>(exec));
1345 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1346 executor)) {
1347 result->set_strategy(std::make_shared<load_balance>(exec));
1348 }
1349 } else if (std::dynamic_pointer_cast<automatical>(strategy)) {
1350 if (auto exec =
1351 std::dynamic_pointer_cast<const HipExecutor>(executor)) {
1352 result->set_strategy(std::make_shared<automatical>(exec));
1353 } else if (auto exec = std::dynamic_pointer_cast<const CudaExecutor>(
1354 executor)) {
1355 result->set_strategy(std::make_shared<automatical>(exec));
1356 }
1357 }
1358}
1359
1360
1361} // namespace detail
1362} // namespace matrix
1363} // namespace gko
1364
1365
1366#endif // GKO_PUBLIC_CORE_MATRIX_CSR_HPP_
ConvertibleTo interface is used to mark that the implementer can be converted to the object of Result...
Definition polymorphic_object.hpp:470
This is the Executor subclass which represents the CUDA device.
Definition executor.hpp:1542
The diagonal of a LinOp implementing this interface can be extracted.
Definition lin_op.hpp:743
The EnableAbsoluteComputation mixin provides the default implementations of compute_absolute_linop an...
Definition lin_op.hpp:794
The EnableLinOp mixin can be used to provide sensible default implementations of the majority of the ...
Definition lin_op.hpp:879
This mixin inherits from (a subclass of) PolymorphicObject and provides a base implementation of a ne...
Definition polymorphic_object.hpp:662
The first step in using the Ginkgo library consists of creating an executor.
Definition executor.hpp:615
Definition lin_op.hpp:117
LinOp(const LinOp &)=default
Copy-constructs a LinOp.
This is the Executor subclass which represents the OpenMP device (typically CPU).
Definition executor.hpp:1387
Linear operators which support permutation should implement the Permutable interface.
Definition lin_op.hpp:484
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor of the object.
Definition polymorphic_object.hpp:234
A LinOp implementing this interface can read its data from a matrix_data structure.
Definition lin_op.hpp:605
Adds the operation M <- a I + b M for matrix M, identity operator I and scalars a and b,...
Definition lin_op.hpp:818
Linear operators which support transposition should implement the Transposable interface.
Definition lin_op.hpp:433
A LinOp implementing this interface can write its data to a matrix_data structure.
Definition lin_op.hpp:660
An array is a container which encapsulates fixed-sized arrays, stored on the Executor tied to the arr...
Definition logger.hpp:25
void resize_and_reset(size_type size)
Resizes the array so it is able to hold the specified number of elements.
Definition array.hpp:622
value_type * get_data() noexcept
Returns a pointer to the block of memory used to store the elements of the array.
Definition array.hpp:673
std::shared_ptr< const Executor > get_executor() const noexcept
Returns the Executor associated with the array.
Definition array.hpp:689
const value_type * get_const_data() const noexcept
Returns a constant pointer to the block of memory used to store the elements of the array.
Definition array.hpp:682
size_type get_size() const noexcept
Returns the number of elements in the array.
Definition array.hpp:656
This type is a device-side equivalent to matrix_data.
Definition device_matrix_data.hpp:36
An index set class represents an ordered set of intervals.
Definition index_set.hpp:56
COO stores a matrix in the coordinate matrix format.
Definition ell.hpp:21
Definition csr.hpp:49
Definition csr.hpp:516
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:681
automatical(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates an automatical strategy with specified parameters.
Definition csr.hpp:589
automatical()
Creates an automatical strategy.
Definition csr.hpp:544
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:669
automatical(std::shared_ptr< const CudaExecutor > exec)
Creates an automatical strategy with CUDA executor.
Definition csr.hpp:554
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:600
automatical(std::shared_ptr< const DpcppExecutor > exec)
Creates an automatical strategy with Dpcpp executor.
Definition csr.hpp:574
automatical(std::shared_ptr< const HipExecutor > exec)
Creates an automatical strategy with HIP executor.
Definition csr.hpp:563
classical is a strategy_type which uses the same number of threads on each row.
Definition csr.hpp:227
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:234
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:263
classical()
Creates a classical strategy.
Definition csr.hpp:232
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:256
cusparse is a strategy_type which uses the sparselib csr.
Definition csr.hpp:302
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:313
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:315
cusparse()
Creates a cusparse strategy.
Definition csr.hpp:307
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:309
load_balance is a strategy_type which uses the load balance algorithm.
Definition csr.hpp:348
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:411
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:503
load_balance(std::shared_ptr< const HipExecutor > exec)
Creates a load_balance strategy with HIP executor.
Definition csr.hpp:375
load_balance()
Creates a load_balance strategy.
Definition csr.hpp:356
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:464
load_balance(int64_t nwarps, int warp_size=32, bool cuda_strategy=true, std::string strategy_name="none")
Creates a load_balance strategy with specified parameters.
Definition csr.hpp:401
load_balance(std::shared_ptr< const CudaExecutor > exec)
Creates a load_balance strategy with CUDA executor.
Definition csr.hpp:366
load_balance(std::shared_ptr< const DpcppExecutor > exec)
Creates a load_balance strategy with DPCPP executor.
Definition csr.hpp:386
merge_path is a strategy_type which uses the merge_path algorithm.
Definition csr.hpp:277
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:288
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:290
merge_path()
Creates a merge_path strategy.
Definition csr.hpp:282
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:284
sparselib is a strategy_type which uses the sparselib csr.
Definition csr.hpp:326
int64_t clac_size(const int64_t nnz) override
Computes the srow size according to the number of nonzeros.
Definition csr.hpp:337
void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow) override
Computes srow according to row pointers.
Definition csr.hpp:333
sparselib()
Creates a sparselib strategy.
Definition csr.hpp:331
std::shared_ptr< strategy_type > copy() override
Copy a strategy.
Definition csr.hpp:339
strategy_type is to decide how to set the csr algorithm.
Definition csr.hpp:170
virtual int64_t clac_size(const int64_t nnz)=0
Computes the srow size according to the number of nonzeros.
std::string get_name()
Returns the name of strategy.
Definition csr.hpp:188
virtual std::shared_ptr< strategy_type > copy()=0
Copy a strategy.
virtual void process(const array< index_type > &mtx_row_ptrs, array< index_type > *mtx_srow)=0
Computes srow according to row pointers.
strategy_type(std::string name)
Creates a strategy_type.
Definition csr.hpp:179
CSR is a matrix format which stores only the nonzero coefficients by compressing each row of the matr...
Definition sparsity_csr.hpp:21
std::unique_ptr< LinOp > column_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the column permutation of the Permutable object.
Csr & operator=(const Csr &)
Copy-assigns a Csr matrix.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > permutation, permute_mode=permute_mode::symmetric) const
Creates a scaled and permuted copy of this matrix.
void write(mat_data &data) const override
Writes a matrix to a matrix_data structure.
std::unique_ptr< absolute_type > compute_absolute() const override
Gets the AbsoluteLinOp.
const index_type * get_const_row_ptrs() const noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:914
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const span &row_span, const span &column_span) const
Creates a submatrix from this Csr matrix given row and column spans.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size={}, size_type num_nonzeros={}, std::shared_ptr< strategy_type > strategy=nullptr)
Creates an uninitialized CSR matrix of the specified size.
void read(device_mat_data &&data) override
Reads a matrix from a device_matrix_data structure.
const index_type * get_const_srow() const noexcept
Returns the starting rows.
Definition csr.hpp:933
void set_strategy(std::shared_ptr< strategy_type > strategy)
Set the strategy.
Definition csr.hpp:972
void inv_scale(ptr_param< const LinOp > alpha)
Scales the matrix with the inverse of a scalar.
Definition csr.hpp:997
void read(const device_mat_data &data) override
Reads a matrix from a device_matrix_data structure.
index_type * get_srow() noexcept
Returns the starting rows.
Definition csr.hpp:924
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, std::shared_ptr< strategy_type > strategy)
Creates an uninitialized CSR matrix of the specified size.
size_type get_num_srow_elements() const noexcept
Returns the number of the srow stored elements (involved warps)
Definition csr.hpp:943
std::unique_ptr< LinOp > inverse_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the symmetric inverse row and column permutation of the Permutable objec...
std::unique_ptr< LinOp > row_permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the row permutation of the Permutable object.
std::unique_ptr< Csr< ValueType, IndexType > > create_submatrix(const index_set< IndexType > &row_index_set, const index_set< IndexType > &column_index_set) const
Creates a submatrix from this Csr matrix given row and column index_set objects.
std::unique_ptr< Diagonal< ValueType > > extract_diagonal() const override
Extracts the diagonal entries of the matrix into a vector.
static std::unique_ptr< Csr > create(std::shared_ptr< const Executor > exec, const dim< 2 > &size, array< value_type > values, array< index_type > col_idxs, array< index_type > row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a CSR matrix from already allocated (and initialized) row pointer, column index and value arr...
index_type * get_row_ptrs() noexcept
Returns the row pointers of the matrix.
Definition csr.hpp:905
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > permutation, permute_mode mode=permute_mode::symmetric) const
Creates a permuted copy of this matrix with the given permutation .
static std::unique_ptr< const Csr > create_const(std::shared_ptr< const Executor > exec, const dim< 2 > &size, gko::detail::const_array_view< ValueType > &&values, gko::detail::const_array_view< IndexType > &&col_idxs, gko::detail::const_array_view< IndexType > &&row_ptrs, std::shared_ptr< strategy_type > strategy=nullptr)
Creates a constant (immutable) Csr matrix from a set of constant arrays.
Csr(const Csr &)
Copy-constructs a Csr matrix.
Csr & operator=(Csr &&)
Move-assigns a Csr matrix.
std::unique_ptr< LinOp > transpose() const override
Returns a LinOp representing the transpose of the Transposable object.
const value_type * get_const_values() const noexcept
Returns the values of the matrix.
Definition csr.hpp:876
std::unique_ptr< LinOp > inverse_column_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
std::unique_ptr< LinOp > inverse_row_permute(const array< IndexType > *inverse_permutation_indices) const override
Returns a LinOp representing the row permutation of the inverse permuted object.
void compute_absolute_inplace() override
Compute absolute inplace on each element.
size_type get_num_stored_elements() const noexcept
Returns the number of elements explicitly stored in the matrix.
Definition csr.hpp:953
std::shared_ptr< strategy_type > get_strategy() const noexcept
Returns the strategy.
Definition csr.hpp:962
std::unique_ptr< LinOp > permute(const array< IndexType > *permutation_indices) const override
Returns a LinOp representing the symmetric row and column permutation of the Permutable object.
const index_type * get_const_col_idxs() const noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:895
void read(const mat_data &data) override
Reads a matrix from a matrix_data structure.
void sort_by_column_index()
Sorts all (value, col_idx) pairs in each row by column index.
std::unique_ptr< Csr > scale_permute(ptr_param< const ScaledPermutation< value_type, index_type > > row_permutation, ptr_param< const ScaledPermutation< value_type, index_type > > column_permutation, bool invert=false) const
Creates a scaled and permuted copy of this matrix.
void scale(ptr_param< const LinOp > alpha)
Scales the matrix with a scalar.
Definition csr.hpp:984
value_type * get_values() noexcept
Returns the values of the matrix.
Definition csr.hpp:867
index_type * get_col_idxs() noexcept
Returns the column indexes of the matrix.
Definition csr.hpp:886
Csr(Csr &&)
Move-constructs a Csr matrix.
std::unique_ptr< Csr > permute(ptr_param< const Permutation< index_type > > row_permutation, ptr_param< const Permutation< index_type > > column_permutation, bool invert=false) const
Creates a non-symmetrically permuted copy of this matrix with the given row and column permutations...
std::unique_ptr< LinOp > conj_transpose() const override
Returns a LinOp representing the conjugate transpose of the Transposable object.
Dense is a matrix format which explicitly stores all values of the matrix.
Definition sparsity_csr.hpp:25
This class is a utility which efficiently implements the diagonal matrix (a linear operator which sca...
Definition diagonal.hpp:53
ELL is a matrix format where stride with explicit zeros is used such that all rows have the same numb...
Definition ell.hpp:64
Fixed-block compressed sparse row storage matrix format.
Definition sparsity_csr.hpp:29
HYBRID is a matrix format which splits the matrix into ELLPACK and COO format.
Definition hybrid.hpp:55
Permutation is a matrix format that represents a permutation matrix, i.e.
Definition permutation.hpp:112
ScaledPermutation is a matrix combining a permutation with scaling factors.
Definition scaled_permutation.hpp:38
SELL-P is a matrix format similar to ELL format.
Definition sellp.hpp:55
SparsityCsr is a matrix format which stores only the sparsity pattern of a sparse matrix by compressi...
Definition sparsity_csr.hpp:56
This class is used for function parameters in the place of raw pointers.
Definition utils_helper.hpp:41
permute_mode
Specifies how a permutation will be applied to a matrix.
Definition permutation.hpp:42
@ symmetric
The rows and columns will be permuted.
The Ginkgo namespace.
Definition abstract_factory.hpp:20
typename detail::remove_complex_s< T >::type remove_complex
Obtain the type which removed the complex of complex/scalar type or the template parameter of class b...
Definition math.hpp:260
typename detail::next_precision_impl< T >::type next_precision
Obtains the next type in the singly-linked precision list with half.
Definition math.hpp:438
typename detail::to_complex_s< T >::type to_complex
Obtain the type which adds the complex of complex/scalar type or the template parameter of class by a...
Definition math.hpp:279
constexpr int64 ceildiv(int64 num, int64 den)
Performs integer division with rounding up.
Definition math.hpp:590
std::size_t size_type
Integral type used for allocation quantities.
Definition types.hpp:89
constexpr T min(const T &x, const T &y)
Returns the smaller of the arguments.
Definition math.hpp:719
detail::temporary_clone< detail::pointee< Ptr > > make_temporary_clone(std::shared_ptr< const Executor > exec, Ptr &&ptr)
Creates a temporary_clone.
Definition temporary_clone.hpp:208
STL namespace.
A type representing the dimensions of a multidimensional object.
Definition dim.hpp:26
This structure is used as an intermediate data type to store a sparse matrix.
Definition matrix_data.hpp:126
A span is a lightweight structure used to create sub-ranges from other ranges.
Definition range.hpp:46