Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SmallMatrix: Matrix class with compile time size #4176

Merged
merged 2 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 11 additions & 22 deletions Src/Base/AMReX_Array.H
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <AMReX_REAL.H>
#include <AMReX_Algorithm.H>
#include <AMReX_Dim3.H>
#include <AMReX_SmallMatrix.H>

#include <array>
#include <memory>
Expand Down Expand Up @@ -148,10 +149,6 @@ namespace amrex {
* order (last index moving fastest). If not specified, Fortran order is
* assumed.
*/
namespace Order {
struct C {};
struct F {};
}

/**
* A GPU-compatible one-dimensional array.
Expand Down Expand Up @@ -280,7 +277,7 @@ namespace amrex {
* default if not given)
*/
template <class T, int XLO, int XHI, int YLO, int YHI,
class ORDER=Order::F>
Order ORDER = Order::F>
struct Array2D
{
/**
Expand Down Expand Up @@ -370,8 +367,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j) const noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -384,8 +380,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j) noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -398,8 +393,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c j moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j) const noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -412,8 +406,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c j moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j) noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand Down Expand Up @@ -551,7 +544,7 @@ namespace amrex {
* default if not given)
*/
template <class T, int XLO, int XHI, int YLO, int YHI, int ZLO, int ZHI,
class ORDER=Order::F>
Order ORDER=Order::F>
struct Array3D
{
/**
Expand Down Expand Up @@ -662,8 +655,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j, int k) const noexcept {
return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
Expand All @@ -676,8 +668,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j, int k) noexcept {
return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
Expand All @@ -690,8 +681,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c k moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j, int k) const noexcept {
return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))
Expand All @@ -704,8 +694,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c k moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j, int k) noexcept {
return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))
Expand Down
38 changes: 38 additions & 0 deletions Src/Base/AMReX_ConstexprFor.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#ifndef AMREX_CONSTEXPR_FOR_H_
#define AMREX_CONSTEXPR_FOR_H_
#include <AMReX_Config.H>

#include <AMReX_GpuQualifiers.H>
#include <AMReX_Extension.H>

#include <type_traits>

namespace amrex {

// Implementation of "constexpr for" based on
// https://artificial-mind.net/blog/2020/10/31/constexpr-for
//
// Approximates what one would get from a compile-time
// unrolling of the loop
// for (int i = 0; i < N; ++i) {
// f(i);
// }
//
// The mechanism is recursive: we evaluate f(i) at the current
// i and then call the for loop at i+1. f() is a lambda function
// that provides the body of the loop and takes only an integer
// i as its argument.

template<auto I, auto N, class F>
AMREX_GPU_HOST_DEVICE AMREX_INLINE
constexpr void constexpr_for (F const& f)
{
if constexpr (I < N) {
f(std::integral_constant<decltype(I), I>());
constexpr_for<I+1, N>(f);
}
}

}

#endif
25 changes: 1 addition & 24 deletions Src/Base/AMReX_Loop.H
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <AMReX_Config.H>

#include <AMReX_Box.H>
#include <AMReX_ConstexprFor.H>
#include <AMReX_Extension.H>

namespace amrex {
Expand Down Expand Up @@ -567,30 +568,6 @@ void LoopConcurrentOnCpu (BoxND<dim> const& bx, int ncomp, F const& f) noexcept
}
}

// Implementation of "constexpr for" based on
// https://artificial-mind.net/blog/2020/10/31/constexpr-for
//
// Approximates what one would get from a compile-time
// unrolling of the loop
// for (int i = 0; i < N; ++i) {
// f(i);
// }
//
// The mechanism is recursive: we evaluate f(i) at the current
// i and then call the for loop at i+1. f() is a lambda function
// that provides the body of the loop and takes only an integer
// i as its argument.

template<auto I, auto N, class F>
AMREX_GPU_HOST_DEVICE AMREX_INLINE
constexpr void constexpr_for (F const& f)
{
if constexpr (I < N) {
f(std::integral_constant<decltype(I), I>());
constexpr_for<I+1, N>(f);
}
}

#include <AMReX_Loop.nolint.H>

}
Expand Down
Loading
Loading