diff --git a/libmpdata++-config.cmake b/libmpdata++-config.cmake index 32c4e83a..0415f1ec 100644 --- a/libmpdata++-config.cmake +++ b/libmpdata++-config.cmake @@ -32,7 +32,7 @@ set(libmpdataxx_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/../../include/") ############################################################################################ # debug mode compiler flags -set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++14 -DBZ_DEBUG -g -Wno-enum-compare") #TODO: -Og if compiler supports it? +set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++17 -DBZ_DEBUG -g -Wno-enum-compare -Wfatal-errors") #TODO: -Og if compiler supports it? ############################################################################################ @@ -42,7 +42,7 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" ) - set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++14 -DNDEBUG -Ofast -march=native -Wno-enum-compare") + set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++17 -DNDEBUG -Ofast -march=native -Wno-enum-compare") # preventing Kahan summation from being optimised out if ( @@ -58,7 +58,7 @@ if( CMAKE_CXX_COMPILER_ID STREQUAL "Intel" ) # flags taken from -fast but without -static - set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++14 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2") + set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++17 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2") endif() diff --git a/libmpdata++/blitz.hpp b/libmpdata++/blitz.hpp index 53273bde..8a54410a 100644 --- a/libmpdata++/blitz.hpp +++ b/libmpdata++/blitz.hpp @@ -51,6 +51,7 @@ namespace libmpdataxx { template using idx_t = blitz::RectDomain; + template using idxs_t = blitz::StridedDomain; using rng_t = blitz::Range; // non-int ix_t means either rng_t or idx_t diff --git a/libmpdata++/concurr/boost_thread.hpp b/libmpdata++/concurr/boost_thread.hpp index f1d7c7a6..c804e9bd 100644 --- a/libmpdata++/concurr/boost_thread.hpp +++ b/libmpdata++/concurr/boost_thread.hpp @@ -51,10 +51,8 @@ namespace libmpdataxx // ctor - mem_t(const std::array &grid_size) : - b(size(grid_size[0])), - parent_t::mem_t(grid_size, size(grid_size[0])) - {}; + mem_t(const std::array &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {}; + mem_t(const std::array &grid_size) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0])) {}; void barrier() { @@ -81,7 +79,7 @@ namespace libmpdataxx // ctor boost_thread(const typename solver_t::rt_params_t &p) : - parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction + parent_t(p, detail::mem_factory(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction {} }; diff --git a/libmpdata++/concurr/cxx11_thread.hpp b/libmpdata++/concurr/cxx11_thread.hpp index 77dc47d9..408b7f3b 100644 --- a/libmpdata++/concurr/cxx11_thread.hpp +++ b/libmpdata++/concurr/cxx11_thread.hpp @@ -92,10 +92,8 @@ namespace libmpdataxx } // ctor - mem_t(const std::array &grid_size) : - b(size(grid_size[0])), - parent_t::mem_t(grid_size, size(grid_size[0])) - {}; + mem_t(const std::array &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {}; + mem_t(const std::array &grid_size) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0])) {}; void barrier() { @@ -119,7 +117,7 @@ namespace libmpdataxx // ctor cxx11_thread(const typename solver_t::rt_params_t &p) : - parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction + parent_t(p, detail::mem_factory(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction {} }; diff --git a/libmpdata++/concurr/detail/concurr_common.hpp b/libmpdata++/concurr/detail/concurr_common.hpp index 49e53eeb..5981803a 100644 --- a/libmpdata++/concurr/detail/concurr_common.hpp +++ b/libmpdata++/concurr/detail/concurr_common.hpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include @@ -34,6 +34,8 @@ #include #include +#include + namespace libmpdataxx { namespace concurr @@ -149,12 +151,7 @@ namespace libmpdataxx protected: - // (cannot be nested due to templates) - typedef sharedmem< - typename solver_t::real_t, - solver_t::n_dims, - solver_t::n_tlev - > mem_t; + using mem_t = typename solver_t::mem_t; // member fields boost::ptr_vector algos; @@ -452,6 +449,15 @@ namespace libmpdataxx return mem->max(mem->advectee(e)); } }; + + template< class mem_t, class solver_t> + mem_t* mem_factory(const typename solver_t::rt_params_t &p) + { + if constexpr (solvers::detail::slvr_with_frac_recn()) + return new mem_t(p.grid_size, pow(2, p.n_fra_iter)); + else + return new mem_t(p.grid_size); + } } // namespace detail } // namespace concurr } // namespace libmpdataxx diff --git a/libmpdata++/concurr/detail/distmem.hpp b/libmpdata++/concurr/detail/distmem.hpp index d17503f0..a4d50357 100644 --- a/libmpdata++/concurr/detail/distmem.hpp +++ b/libmpdata++/concurr/detail/distmem.hpp @@ -54,6 +54,7 @@ namespace libmpdataxx public: std::array grid_size; + std::array grid_size_ref; int rank() { diff --git a/libmpdata++/concurr/detail/sharedmem.hpp b/libmpdata++/concurr/detail/sharedmem.hpp index f6ae1c5e..9e888025 100644 --- a/libmpdata++/concurr/detail/sharedmem.hpp +++ b/libmpdata++/concurr/detail/sharedmem.hpp @@ -97,8 +97,6 @@ namespace libmpdataxx rng_t(0, grid_size[d]-1), d == 0 ? distmem.rank() : 0, // decomposition along x, because that's MPI decomposition d == 0 ? distmem.size() : 1 - // d == shmem_decomp_dim ? distmem.rank() : 0, - // d == shmem_decomp_dim ? distmem.size() : 1 ); origin[d] = this->grid_size[d].first(); } @@ -294,6 +292,7 @@ namespace libmpdataxx } virtual arr_t advectee(int e = 0) = 0; + virtual const arr_t advectee_global(int e = 0) = 0; void advectee_global_set(const arr_t arr, int e = 0) { @@ -507,9 +506,11 @@ namespace libmpdataxx class sharedmem : public sharedmem_common { using parent_t = sharedmem_common; - using arr_t = typename parent_t::arr_t; using parent_t::parent_t; // inheriting ctors + protected: + using arr_t = typename parent_t::arr_t; + public: virtual arr_t *never_delete(arr_t *arg) override diff --git a/libmpdata++/concurr/detail/sharedmem_refined.hpp b/libmpdata++/concurr/detail/sharedmem_refined.hpp new file mode 100644 index 00000000..6c393802 --- /dev/null +++ b/libmpdata++/concurr/detail/sharedmem_refined.hpp @@ -0,0 +1,122 @@ +/** @file + * @copyright University of Warsaw + * @section LICENSE + * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) + */ + +// memory management with (fractal) grid refinement + +#pragma once + +#include "sharedmem.hpp" + +namespace libmpdataxx +{ + namespace concurr + { + namespace detail + { + template < + typename real_t, + int n_dims, + int n_tlev + > + class sharedmem_refined_common : public sharedmem + { + using parent_t = sharedmem; + + protected: + + using arr_t = typename parent_t::arr_t; + + blitz::TinyVector origin_ref; + + public: + + const int n_ref; // number of equal divisions of the large cell (in each direction), refined resolution is dx / n_ref; + // every n_ref scalar of the refined grid is at the same position as a scalar of the normal grid + // no refinement done in the halo, because there are no SD in the halo (it's not real space) + // what about MPI boundaries? there are refined points exactly at the boundary (since n_ref has to be even) + // note: if there is a refined cell that is divided by the MPI boundary, o we need to add contributions from microphysics to both processes on both sides? + // maybe not, because microphysics contrbutions will affect the large cells, which are not divided by the MPI boundary... + + std::array grid_size_ref; + // TODO: these are public because used from outside in alloc - could friendship help? + //arrvec_t GC_ref, psi_ref; + arrvec_t psi_ref; + + // ctors + sharedmem_refined_common(const std::array &grid_size, const int &size, const int &n_ref) + : parent_t(grid_size, size), n_ref(n_ref) + { + assert(n_ref % 2 == 0); // only division into even number of cells, because we assume that one of the refined scalar points is at the MPI boundary, which is in the middle between normal grid scalars + + // for now, require a grid_size that is convenient for fractal reconstruction (which calculates 2 points based on 3 points) + // NOTE: fix this with proper halos (cyclic is easy, but what about rigid?) + // NOTE2: this is actually a requirement for fractal reconstruction, not for any grid refinement, so move this somewhere else + for (int d = 0; d < n_dims; ++d) + if((grid_size[d] - 3) % 2 != 0) throw std::runtime_error("Fractal grid refinement requires nx/ny/nz = 3 + 2 * i, where i = 0,1,2,3,..."); + + for (int d = 0; d < n_dims; ++d) + { + grid_size_ref[d] = refine_grid_size( + this->grid_size[d], + n_ref, + d == 0 ? this->distmem.rank() : 0, + d == 0 ? this->distmem.size() : 1 + ); + origin_ref[d] = grid_size_ref[d].first(); + + this->distmem.grid_size_ref[d] = refine_grid_size(rng_t(0,grid_size[d]-1), n_ref, 0, 1).length(); + } + } + + // NOTE: not all advectees are refined, so e (numbering) in refinee is different than in advectee + virtual arr_t refinee(int e = 0) = 0; + // virtual const arr_t refinee_global_ref(int e = 0) = 0; + + public: + static rng_t refine_grid_size( + const rng_t &grid_size, + const int &n_ref, + const int &mpi_rank, + const int &mpi_size + ) { + assert(n_ref % 2 == 0); + // NOTE: overlapping points inbetween MPI domains + return rng_t( + mpi_rank == 0 ? grid_size.first() * n_ref : grid_size.first() * n_ref - n_ref / 2, + mpi_rank == mpi_size-1 ? grid_size.last() * n_ref : grid_size.last() * n_ref + n_ref / 2 // refined points between MPI domains are evenly divided between MPI processes + ); + } + }; + + + + template + class sharedmem_refined + {}; + + template + class sharedmem_refined : public sharedmem_refined_common + { + using parent_t = sharedmem_refined_common; + using parent_t::parent_t; // inheriting ctors + + protected: + using arr_t = typename parent_t::arr_t; + + public: + arr_t refinee(int e = 0) override + { + return this->psi_ref[e]( + this->grid_size_ref[0], + this->grid_size_ref[1], + this->grid_size_ref[2] + ).reindex(this->origin_ref); + } + }; + + } // namespace detail + } // namespace concurr +} // namespace libmpdataxx diff --git a/libmpdata++/concurr/openmp.hpp b/libmpdata++/concurr/openmp.hpp index d33cda98..96362754 100644 --- a/libmpdata++/concurr/openmp.hpp +++ b/libmpdata++/concurr/openmp.hpp @@ -31,7 +31,6 @@ namespace libmpdataxx { using parent_t = detail::concurr_common; - struct mem_t : parent_t::mem_t { static int size(const unsigned max_threads = std::numeric_limits::max()) @@ -58,7 +57,8 @@ namespace libmpdataxx } // ctors - mem_t(const std::array &grid_size) : parent_t::mem_t(grid_size, size(grid_size[0])) {}; + mem_t(const std::array &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {}; + mem_t(const std::array &grid_size) : parent_t::mem_t(grid_size, size(grid_size[0])) {}; }; void solve(typename parent_t::advance_arg_t nt) @@ -75,9 +75,8 @@ namespace libmpdataxx public: - // ctor openmp(const typename solver_t::rt_params_t &p) : - parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction + parent_t(p, detail::mem_factory(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction {} }; diff --git a/libmpdata++/concurr/serial.hpp b/libmpdata++/concurr/serial.hpp index f6abc1ea..1a5fe18b 100644 --- a/libmpdata++/concurr/serial.hpp +++ b/libmpdata++/concurr/serial.hpp @@ -33,9 +33,8 @@ namespace libmpdataxx void barrier() { } // ctors - mem_t(const std::array &grid_size) - : parent_t::mem_t(grid_size, size()) - {}; + mem_t(const std::array &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {}; + mem_t(const std::array &grid_size) : parent_t::mem_t(grid_size, size(grid_size[0])) {}; }; void solve(typename parent_t::advance_arg_t nt) @@ -47,7 +46,7 @@ namespace libmpdataxx // ctor serial(const typename solver_t::rt_params_t &p) : - parent_t(p, new mem_t(p.grid_size), mem_t::size()) + parent_t(p, detail::mem_factory(p), mem_t::size()) {} }; diff --git a/libmpdata++/formulae/fractal_reconstruction.hpp b/libmpdata++/formulae/fractal_reconstruction.hpp new file mode 100644 index 00000000..15fbacc6 --- /dev/null +++ b/libmpdata++/formulae/fractal_reconstruction.hpp @@ -0,0 +1,181 @@ +/** @file +* @copyright University of Warsaw +* @section LICENSE +* GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) +*/ + +#pragma once + +#include + +namespace libmpdataxx +{ + namespace formulae + { + namespace fractal + { + // CDF of stretching parameter d; assuming CDF = A d^B + C under condition that CDF(0.5)=0 and CDF(1)=1 + template + static real_t CDF_of_d(const real_t &d) + { + const real_t B = -0.39091161; // comes from a fit to the E. Akinlabi data + return (pow(d,B) - pow(0.5,B)) / (1. - pow(0.5,B)); + } + // inverse function: d of a CDF + template + static real_t d_of_CDF(const real_t &CDF) + { + const real_t B = -0.39091161; // comes from a fit to the E. Akinlabi data + return pow((1.-pow(0.5,B))*CDF + pow(0.5,B), 1./B); + } + + template + struct d_of_CDF_fctr + { + real_t operator()(const real_t &CDF) const + { + return CDF < 0 ? -d_of_CDF(-CDF) : d_of_CDF(CDF); + } + BZ_DECLARE_FUNCTOR(d_of_CDF_fctr); + }; + + // (tri-)linear interpolation + // 3d version + template + void intrp( + arr_t arr, // array to be interpolated + const rng_t &i, // index of the point to be interpolated to + const rng_t &j, // ditto + const rng_t &k, // ditto + const int &dist // number of indices between an interpolated point and a known point + ) + { + using idxperm::pis; + + // debug output + // std::cerr << "ranges in interpolation" << std::endl; + // if(d==0) + // std::cerr << "range<" << d << ">: " << i << " " << j << " " << k << std::endl; + // if(d==1) + // std::cerr << "range<" << d << ">: " << k << " " << i << " " << j << std::endl; + // if(d==2) + // std::cerr << "range<" << d << ">: " << j << " " << k << " " << i << std::endl; + + arr(pis(i, j, k)) = real_t(.5) * ( + arr(pis(i - dist, j, k)) + + arr(pis(i + dist, j, k)) + ); + } + + // fractral reconstruction (calculates 2 mid-points based on 3) + // following Akinlabi et al. "Fractal reconstruciton..." 2019 + // 3d version + template + void rcnstrct( + arr_t arr, // array to be reconstructed + const rng_t &i, // index of the first point from the pair to be reconstructed + const rng_t &j, // ditto + const rng_t &k, // ditto + const int &dist, // number of indices between a reconstructed point and a known point + arr_t c_j, // parameter c + const arr_t d_j, // parameter d (stretching) - needs to be calculated before + arr_t f_j // parameter f + ) + { + // debug output +// std::cerr << "ranges in rcnstrct" << std::endl; +// if(d==0) +// std::cerr << "range<" << d << ">: " << i << " " << j << " " << k << std::endl; +// if(d==1) +// std::cerr << "range<" << d << ">: " << k << " " << i << " " << j << std::endl; +// if(d==2) +// std::cerr << "range<" << d << ">: " << j << " " << k << " " << i << std::endl; + + using idxperm::pis; + + // second reconstructed position (j=2, between i=1 and i=2) + const rng_t i_next = i + 2*dist; + + const int x[3] = {0, 1, 2}; + + // helper references + const arr_t u_0 = arr(pis(i - dist, j, k)), + u_1 = arr(pis(i + dist, j, k)), + u_2 = arr(pis(i_next + dist, j, k)); + + arr_t c_1 = c_j(pis(i, j, k)), + c_2 = c_j(pis(i_next, j, k)), + d_1 = d_j(pis(i, j, k)), + d_2 = d_j(pis(i_next, j, k)), + f_1 = f_j(pis(i, j, k)), + f_2 = f_j(pis(i_next, j, k)); + + // Eqs. (5) and (6) Akinlabi et al. + c_1 = (u_1 - u_0) / (2.) - d_1 * (u_2 - u_0) / (2.); + c_2 = (u_2 - u_1) / (2.) - d_2 * (u_2 - u_0) / (2.); + f_1 = (x[2] * u_0 - x[0] * u_1) / (2.) - d_1 * (x[2] * u_0 - x[0] * u_2) / (2.); + f_2 = (x[2] * u_1 - x[0] * u_2) / (2.) - d_2 * (x[2] * u_0 - x[0] * u_2) / (2.); + + // new u, at j=1, between i=0 and i=1, result of w_j=1(u_i=1), Eq. (1) in Akinlabi et al. + arr(pis(i , j, k)) = c_1 * 1 + d_1 * u_1 + f_1; + // new u, at j=2, between i=1 and i=2, result of w_j=2(u_i=1), Eq. (1) in Akinlabi et al. + arr(pis(i_next, j, k)) = c_2 * 1 + d_2 * u_1 + f_2; + + // DEBUG: do interpolation, useful for testing if ranges are correct +// arr(pis(i, j, k)) = real_t(.5) * (u_0 + u_1); +// arr(pis(i_next, j, k)) = real_t(.5) * (u_1 + u_2); + + +// alternative solution following Scotti and Meneveau Physica D 1999 +/* + + // helper references + // NOTE: these are actually the resolved values only in the first iteration, in the subsequent iterations some of these are reconstructed values + // should we point to the resolved values in all iterations?? + const arr_t u_im1 = arr(pis(i - dist, j, k)), // resolved u_(i-1) + u_i = arr(pis(i + dist, j, k)), // resolved u_(i) + u_ip1 = arr(pis(i_next + dist, j, k)); // resolved u_(i+1) + + // c_j and f_j naming comes from the Akinlabi paper, TODO: rename to a_i b_i + // NOTE: in multiple iterations of reconstruction, they only change due to random stretching parameters? + // or the stretching parameter should be drawn from the distro once? then no need for c_j and f_j to have the same size as reconstructed fields... + arr_t a_1 = this->c_j(pis(i, j, k)), + a_2 = this->c_j(pis(i_next, j, k)), + b_1 = this->f_j(pis(i, j, k)), + b_2 = this->f_j(pis(i_next, j, k)); + + // Eqs. (13) and (14) from Scotti Meneveau 1999 + a_1 = u_i - u_im1 - d_1*(u_ip1 - u_im1); + a_2 = u_ip1 - u_i - d_2*(u_ip1 - u_im1); + b_1 = u_im1*(real_t(1) - d_1); + b_2 = u_i - d_2*u_im1; + + // W(u_0(xi)) at xi=1/4, between u_i-1 and u_i + // u_0(2*xi) = u_i-1 + 1/2 * (u_i+1 - u_i-1) + // Eq. (6) and caption of fig. 1 Scotti 1999 + // NOTE: in multiple iterations, should xi be between the 3 resolved points? or should we consider the reconstructed points as new resolved points? + arr(pis(i, j, k)) = d_1 * +// (u_im1 + 0.5 * (u_ip1 - u_im1)) + // u_0(2*xi) + u_i + + 0.5 * a_1 + b_1; // q_1(2*xi) + // W(u_0(xi)) at xi=3/4, between u_i and u_i+1 + // u_0(2*xi-1) = u_i-1 + 1/2 * (u_i+1 - u_i-1) + // NOTE: u_0(2*3/4-1) == u_0(2*1/4), so calculate it once in the previous step and store it... + arr(pis(i_next, j, k)) = d_2 * +// (u_im1 + 0.5 * (u_ip1 - u_im1)) + // u_0(2*xi-1) + u_i + + 0.5 * a_2 + b_2; // q_2(2*xi-1) +*/ + // Eq. (5) Scotti and Meneveau PRL 1997, a_1 and a_2 differ from Scotti Meneveau 1999 ! + // NOTE: in the 1999 paper, a_1 and a_2 are multipled by 2*xi and 2*xi-1, respectively + // in the 1997 paper, the are both multipled by xi + /* + a_1 = 2 * (u_im1 - u_i - d_1*(u_ip1 - u_im1)); + a_2 = 2 * (u_ip1 - u_i - d_2*(u_ip1 - u_im1)); + b_1 = u_im1*(real_t(1) - d_1); + b_2 = u_i - d_2*u_im1; + */ + } + }; + }; +}; diff --git a/libmpdata++/formulae/idxperm.hpp b/libmpdata++/formulae/idxperm.hpp index 5c5c1c74..1d58574d 100644 --- a/libmpdata++/formulae/idxperm.hpp +++ b/libmpdata++/formulae/idxperm.hpp @@ -69,5 +69,18 @@ namespace libmpdataxx template<> inline int_idx_t<3> pi<2>(const int k, const int i, const int j) { return int_idx_t<3>({i,j,k}); } + + // 3D - strided ranges + template + inline idxs_t<3> pis(const rng_t &i, const rng_t &j, const rng_t &k); + + template<> + inline idxs_t<3> pis<0>(const rng_t &i, const rng_t &j, const rng_t &k) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; } + + template<> + inline idxs_t<3> pis<1>(const rng_t &j, const rng_t &k, const rng_t &i) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; } + + template<> + inline idxs_t<3> pis<2>(const rng_t &k, const rng_t &i, const rng_t &j) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; } } // namespace idxperm } // namespace libmpdataxx diff --git a/libmpdata++/formulae/refined_grid.hpp b/libmpdata++/formulae/refined_grid.hpp new file mode 100644 index 00000000..4093b1d5 --- /dev/null +++ b/libmpdata++/formulae/refined_grid.hpp @@ -0,0 +1,61 @@ +/** @file +* @copyright University of Warsaw +* @section LICENSE +* GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) +*/ + +#pragma once + +//#include + +namespace libmpdataxx +{ + namespace formulae + { + namespace refined + { + // calculate regular grid scalar as an average from the refined grid + // modifies the refined grid scalar! + // 3d version + template + void spatial_average_ref2reg( + arr_t arr, + const ijk_t &ijk, // index of the refined point that overlaps with the regular point + const int &dist, // average calculate from the box (i-dist, i+dist)(j-dist,j+dist)(k-dist.k+dist) + const std::array grid_size, // number of points in each direction (total, not for this MPI process), needed for averaging on edges so that it does not go out of the domain; refined arrays have no halos on domain edgs, only halos between MPI subdomains + const bool volume_avg // should averaging account weights proportional to cell volume (refined cells at box edges are only partially within the regular cell) + ) + { + // WARNING: very bad LOOPS + for(int i = ijk[0].first(); i <= ijk[0].last(); i+= ijk[0].stride()) + for(int j = ijk[1].first(); j <= ijk[1].last(); j+= ijk[1].stride()) + for(int k = ijk[2].first(); k <= ijk[2].last(); k+= ijk[2].stride()) + { + real_t norm = 0; + for(int id=-dist; id<=dist; ++id) + for(int jd=-dist; jd<=dist; ++jd) + for(int kd=-dist; kd<=dist; ++kd) + { + if(id==0 && jd==0 && kd==0) continue; // dont add the overlapping point + // dont add points outside of the domain + if(i+id < 0 || i+id > grid_size[0]-1 || + j+jd < 0 || j+jd > grid_size[1]-1 || + k+kd < 0 || k+kd > grid_size[2]-1) + continue; + + real_t w = 1; // weight + if(volume_avg) + { + if(id==-dist || id == dist) w/=2.; + if(jd==-dist || jd == dist) w/=2.; + if(kd==-dist || kd == dist) w/=2.; + } + arr(i,j,k) += w * arr(i+id,j+jd,k+kd); + norm += w; + } + arr(i,j,k) /= norm+1; // +1 because the center cell was not added to norm + } + } + }; + }; +}; diff --git a/libmpdata++/opts.hpp b/libmpdata++/opts.hpp index 35a89ea2..33f24dc5 100644 --- a/libmpdata++/opts.hpp +++ b/libmpdata++/opts.hpp @@ -31,6 +31,17 @@ namespace libmpdataxx return x == 0 ? 0. : int(log(x)/log(2)) + 1; } + // https://www.geeksforgeeks.org/count-set-bits-in-an-integer/ + constexpr unsigned int nset(opts_t x) + { + unsigned int count = 0; + while (x) { + count += x & 1; + x >>= 1; + } + return count; + } + enum { fct = opts::bit(0), // flux-corrected transport @@ -83,6 +94,7 @@ namespace libmpdataxx enum { opts = opts::iga | opts::fct }; enum { hint_norhs = 0 }; enum { delayed_step = 0 }; + enum { fractal_recon = 0 }; struct ix {}; static constexpr int hint_scale(const int &e) { return 0; } // base-2 logarithm enum { var_dt = false}; diff --git a/libmpdata++/output/detail/output_common.hpp b/libmpdata++/output/detail/output_common.hpp index 47920259..430306d5 100644 --- a/libmpdata++/output/detail/output_common.hpp +++ b/libmpdata++/output/detail/output_common.hpp @@ -38,6 +38,7 @@ namespace libmpdataxx virtual void record(const int var) {} virtual void start(const typename parent_t::advance_arg_t nt) {} + virtual void hook_ante_record_all() {} typename parent_t::arr_t out_data(const int var) { @@ -55,6 +56,7 @@ namespace libmpdataxx this->mem->barrier(); } + hook_ante_record_all(); if (this->rank == 0) { record_time = this->time; @@ -145,20 +147,23 @@ namespace libmpdataxx this->mem->barrier(); } - if (this->rank == 0) + if (this->var_dt && do_record_cnt == 1) { - //TODO: output of solver statistics every timesteps could probably go here - - if (this->var_dt && do_record_cnt == 1) - { + hook_ante_record_all(); + if (this->rank == 0) record_all(); - } - else if (!this->var_dt) + } + else if (!this->var_dt) + { + record_time = this->time; + for (int t = 0; t < outwindow; ++t) { - record_time = this->time; - for (int t = 0; t < outwindow; ++t) + if ((this->timestep - t) % static_cast(outfreq) == 0) { - if ((this->timestep - t) % static_cast(outfreq) == 0) record_all(); + this->mem->barrier(); + hook_ante_record_all(); + if (this->rank == 0) + record_all(); } } } diff --git a/libmpdata++/output/detail/xdmf_writer.hpp b/libmpdata++/output/detail/xdmf_writer.hpp index 173ebd4a..cd7a1c7d 100644 --- a/libmpdata++/output/detail/xdmf_writer.hpp +++ b/libmpdata++/output/detail/xdmf_writer.hpp @@ -18,86 +18,99 @@ namespace libmpdataxx namespace detail { template - class xdmf_writer + struct data_item { using ptree = boost::property_tree::ptree; -#if (BOOST_VERSION >= 105600) - using xml_writer_settings = boost::property_tree::xml_writer_settings; -#else - using xml_writer_settings = boost::property_tree::xml_writer_settings; -#endif - struct data_item + blitz::TinyVector dimensions; + const std::string number_type = "Float"; + const std::string format = "HDF"; + std::string data; + void add(ptree& node) { - blitz::TinyVector dimensions; - const std::string number_type = "Float"; - const std::string format = "HDF"; - std::string data; - void add(ptree& node) - { - std::stringstream ss; - for (auto d : dimensions) - ss << d << ' '; - ptree& dat_node = node.add("DataItem", data); - dat_node.put(".Dimensions", ss.str()); - dat_node.put(".NumberType", number_type); - dat_node.put(".Format", format); - } - }; + std::stringstream ss; + for (auto d : dimensions) + ss << d << ' '; + ptree& dat_node = node.add("DataItem", data); + dat_node.put(".Dimensions", ss.str()); + dat_node.put(".NumberType", number_type); + dat_node.put(".Format", format); + } + }; - struct topology + template + struct topology + { + using ptree = boost::property_tree::ptree; + + const std::string topology_type = dim == 2 ? "2DSMesh" : "3DSMesh"; + blitz::TinyVector dimensions; + void add(ptree& node) { - const std::string topology_type = dim == 2 ? "2DSMesh" : "3DSMesh"; - blitz::TinyVector dimensions; - void add(ptree& node) - { - node.put("Topology..TopologyType", topology_type); - std::stringstream ss; - for (auto d : dimensions) - ss << d << ' '; - node.put("Topology..Dimensions", ss.str()); - } - }; + node.put("Topology..TopologyType", topology_type); + std::stringstream ss; + for (auto d : dimensions) + ss << d << ' '; + node.put("Topology..Dimensions", ss.str()); + } + }; + + template + struct geometry + { + using ptree = boost::property_tree::ptree; - struct geometry + const std::string geometry_type = dim == 2 ? "X_Y" : "X_Y_Z"; + std::array, dim> coords; + void add(ptree& node) { - const std::string geometry_type = dim == 2 ? "X_Y" : "X_Y_Z"; - std::array coords; - void add(ptree& node) - { - ptree& geo_node = node.add("Geometry", ""); - geo_node.put(".GeometryType", geometry_type); - for (auto &c : coords) - c.add(geo_node); - } - }; + ptree& geo_node = node.add("Geometry", ""); + geo_node.put(".GeometryType", geometry_type); + for (auto &c : coords) + c.add(geo_node); + } + }; + + template + struct attribute_t + { + using ptree = boost::property_tree::ptree; - struct attribute + std::string name; + const std::string attribute_type = "Scalar"; + const std::string center = "Cell"; + mutable data_item item; + void add(ptree& node) { - std::string name; - const std::string attribute_type = "Scalar"; - const std::string center = "Cell"; - mutable data_item item; - void add(ptree& node) - { - ptree& attr_node = node.add("Attribute", ""); - attr_node.put(".Name", name); - attr_node.put(".AttributeType", attribute_type); - attr_node.put(".Center", center); - item.add(attr_node); - } + ptree& attr_node = node.add("Attribute", ""); + attr_node.put(".Name", name); + attr_node.put(".AttributeType", attribute_type); + attr_node.put(".Center", center); + item.add(attr_node); + } - // to allow storing attributes in std::set - friend bool operator<(const attribute &lhs, const attribute &rhs) - { - return lhs.name < rhs.name; - } - }; + // to allow storing attributes in std::set + friend bool operator<(const attribute_t &lhs, const attribute_t &rhs) + { + return lhs.name < rhs.name; + } + }; + + template + class xdmf_writer + { + using ptree = boost::property_tree::ptree; +#if (BOOST_VERSION >= 105600) + using xml_writer_settings = boost::property_tree::xml_writer_settings; +#else + using xml_writer_settings = boost::property_tree::xml_writer_settings; +#endif + using attribute = attribute_t; const std::string name = "Grid"; const std::string grid_type = "Uniform"; - topology top; - geometry geo; + topology top; + geometry geo; std::set attrs; std::set c_attrs; @@ -179,7 +192,7 @@ namespace libmpdataxx write_xml(xmf_name, pt, std::locale(), settings); } - void write_temporal(const std::string& xmf_name, const std::vector& timesteps) + void write_temporal(const std::string& xmf_name, const std::vector& timesteps, const std::string ×teps_suffix = "") { ptree pt; @@ -193,14 +206,13 @@ namespace libmpdataxx for (auto ts : timesteps) { ptree& ts_node = grid_node.add("xi:include", ""); - ts_node.put(".href", ts); + ts_node.put(".href", ts + timesteps_suffix); ts_node.put(".xpointer", "gid"); } xml_writer_settings settings('\t', 1); write_xml(xmf_name, pt, std::locale(), settings); } - }; } diff --git a/libmpdata++/output/hdf5.hpp b/libmpdata++/output/hdf5.hpp index 6d8b906b..859f2d77 100644 --- a/libmpdata++/output/hdf5.hpp +++ b/libmpdata++/output/hdf5.hpp @@ -40,7 +40,7 @@ namespace libmpdataxx using output_t = hdf5; std::unique_ptr hdfp; - std::map dim_names; + std::map dim_names, dim_names_ref; const std::string const_name = "const.h5"; std::string const_file; const hsize_t zero = 0, one = 1; @@ -55,10 +55,10 @@ namespace libmpdataxx H5::PredType::NATIVE_FLOAT, flttype_output = H5::PredType::NATIVE_FLOAT; // using floats not to waste disk space - blitz::TinyVector cshape, shape, chunk, srfcshape, srfcchunk, offst; + blitz::TinyVector cshape, shape, chunk, srfcshape, srfcchunk, offst, shape_ref, cshape_ref, chunk_ref, offst_ref; // what if grid refinement is not done??? H5::DSetCreatPropList params; - H5::DataSpace sspace, cspace, srfcspace; + H5::DataSpace sspace, cspace, srfcspace, sspace_ref, cspace_ref; #if defined(USE_MPI) hid_t fapl_id; #endif @@ -96,35 +96,52 @@ namespace libmpdataxx // creating the dimensions // x,y,z offst = 0; + offst_ref = 0; for (int d = 0; d < parent_t::n_dims; ++d) + { shape[d] = this->mem->distmem.grid_size[d]; + shape_ref[d] = this->mem->distmem.grid_size_ref[d]; + } chunk = shape; + chunk_ref = shape_ref; // there is one more coordinate than cell index in each dimension cshape = shape + 1; + cshape_ref = shape_ref + 1; srfcshape = shape; *(srfcshape.end()-1) = 1; sspace = H5::DataSpace(parent_t::n_dims, shape.data()); - srfcspace = H5::DataSpace(parent_t::n_dims, srfcshape.data()); cspace = H5::DataSpace(parent_t::n_dims, cshape.data()); + srfcspace = H5::DataSpace(parent_t::n_dims, srfcshape.data()); + sspace_ref = H5::DataSpace(parent_t::n_dims, shape_ref.data()); + cspace_ref = H5::DataSpace(parent_t::n_dims, cshape_ref.data()); #if defined(USE_MPI) if (this->mem->distmem.size() > 1) { shape[0] = this->mem->grid_size[0].length(); cshape[0] = this->mem->grid_size[0].length(); + shape_ref[0] = this->mem->distmem.rank() < this->mem->distmem.size() - 1 ? + this->mem->grid_size_ref[0].length()-1 : // -1 to make ranges nonoverlapping + this->mem->grid_size_ref[0].length(); + cshape_ref[0] = shape_ref[0]; if (this->mem->distmem.rank() == this->mem->distmem.size() - 1) + { cshape[0] += 1; + cshape_ref[0] += 1; + } offst[0] = this->mem->grid_size[0].first(); + offst_ref[0] = this->mem->grid_size_ref[0].first(); // chunk size has to be common to all processes ! // TODO: something better ? chunk[0] = ( (typename solver_t::real_t) (this->mem->distmem.grid_size[0])) / this->mem->distmem.size() + 0.5 ; + chunk_ref[0] = ( (typename solver_t::real_t) (this->mem->distmem.grid_size_ref[0])) / this->mem->distmem.size() + 0.5 ; } #endif @@ -174,6 +191,39 @@ namespace libmpdataxx curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, cshape.data()), dim_space, dxpl_id); } + // refined X, Y, Z, TODO: very similar to X, Y, Z + for (int i = 0; i < parent_t::n_dims; ++i) + { + + blitz::Array coord(cshape_ref); +#if defined(USE_MPI) + coord.reindexSelf(offst_ref); +#endif + std::string name; + switch (i) + { + case 0 : coord = this->di / 2. + this->di / this->mem->n_ref * (blitz::firstIndex() - .5); + name = "X refined"; + dim_names_ref[i] = name; + break; + case 1 : coord = this->dj / 2. + this->dj / this->mem->n_ref * (blitz::secondIndex() - .5); + name = "Y refined"; + dim_names_ref[i] = name; + break; + case 2 : coord = this->dk / 2. + this->dk / this->mem->n_ref * (blitz::thirdIndex() - .5); + name = "Z refined"; + dim_names_ref[i] = name; + break; + default : break; + } + + auto curr_dim = (*hdfp).createDataSet(name, flttype_output, cspace_ref); + + H5::DataSpace dim_space = curr_dim.getSpace(); + dim_space.selectHyperslab(H5S_SELECT_SET, cshape_ref.data(), offst_ref.data()); + curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, cshape_ref.data()), dim_space, dxpl_id); + } + // T { const hsize_t @@ -287,33 +337,35 @@ namespace libmpdataxx }; } - void record_dsc_helper(const H5::DataSet &dset, const typename solver_t::arr_t &arr) + void record_dsc_helper(const H5::DataSet &dset, const typename solver_t::arr_t &arr, const bool refined = false) { H5::DataSpace space = dset.getSpace(); - space.selectHyperslab(H5S_SELECT_SET, shape.data(), offst.data()); - // TODO: some permutation of grid_size instead of the switch + const auto _grid_size(refined ? this->mem->grid_size_ref : this->mem->grid_size); + const auto _shape(refined ? shape_ref : shape); + space.selectHyperslab(H5S_SELECT_SET, _shape.data(), refined ? offst_ref.data() : offst.data()); + + // TODO: some permutation of grid_size instead of the switch switch (int(solver_t::n_dims)) { case 1: { - typename solver_t::arr_t contiguous_arr = arr(this->mem->grid_size[0]).copy(); // create a copy that is contiguous - dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id); + typename solver_t::arr_t contiguous_arr = arr(_grid_size[0]).copy(); // create a copy that is contiguous + dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id); break; } case 2: { - typename solver_t::arr_t contiguous_arr = arr(this->mem->grid_size[0], this->mem->grid_size[1]).copy(); // create a copy that is contiguous - dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id); + typename solver_t::arr_t contiguous_arr = arr(_grid_size[0], _grid_size[1]).copy(); // create a copy that is contiguous + dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id); break; } case 3: { // create a copy that is contiguous and has the C-style (kji) storage order as required by HDF5 - typename solver_t::arr_t contiguous_arr(this->mem->grid_size[0], this->mem->grid_size[1], this->mem->grid_size[2]); - contiguous_arr = arr(this->mem->grid_size[0], this->mem->grid_size[1], this->mem->grid_size[2]); - - dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id); + typename solver_t::arr_t contiguous_arr(_grid_size[0], _grid_size[1], _grid_size[2]); + contiguous_arr = arr(_grid_size[0], _grid_size[1], _grid_size[2]); + dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id); break; } default: assert(false); @@ -321,45 +373,54 @@ namespace libmpdataxx } // data is assumed to be contiguous and in the same layout as hdf variable and in the C-style storage order - void record_aux_hlpr(const std::string &name, typename solver_t::real_t *data, H5::H5File hdf) + void record_aux_hlpr(const std::string &name, typename solver_t::real_t *data, H5::H5File hdf, bool refined = false) { assert(this->rank == 0); + const auto _shape(refined ? shape_ref : shape); + const auto _offst(refined ? offst_ref : offst); + if(refined) params.setChunk(parent_t::n_dims, chunk_ref.data()); auto aux = hdf.createDataSet( name, flttype_output, - sspace, + refined ? sspace_ref : sspace, params ); auto space = aux.getSpace(); - space.selectHyperslab(H5S_SELECT_SET, shape.data(), offst.data()); - aux.write(data, flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id); + space.selectHyperslab(H5S_SELECT_SET, _shape.data(), _offst.data()); + aux.write(data, flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id); + + // revert to default chunk + if(refined) params.setChunk(parent_t::n_dims, chunk.data()); } // for discontiguous array with halos - void record_aux_dsc_hlpr(const std::string &name, const typename solver_t::arr_t &arr, H5::H5File hdf, bool srfc = false) + void record_aux_dsc_hlpr(const std::string &name, const typename solver_t::arr_t &arr, H5::H5File hdf, bool srfc = false, bool refined = false) { assert(this->rank == 0); + assert(!(refined && srfc)); if(srfc) params.setChunk(parent_t::n_dims, srfcchunk.data()); + else if (refined) + params.setChunk(parent_t::n_dims, chunk_ref.data()); auto aux = hdf.createDataSet( name, flttype_output, - srfc ? srfcspace : sspace, + srfc ? srfcspace : refined ? sspace_ref : sspace, params ); if(srfc) - { - // revert to default chunk - params.setChunk(parent_t::n_dims, chunk.data()); record_dsc_srfc_helper(aux, arr); - } else - record_dsc_helper(aux, arr); + record_dsc_helper(aux, arr, refined); + + // revert to default chunk + if(srfc || refined) + params.setChunk(parent_t::n_dims, chunk.data()); } void record_scalar_hlpr(const std::string &name, const std::string &group_name, typename solver_t::real_t data, H5::H5File hdf) @@ -405,7 +466,12 @@ namespace libmpdataxx // data is assumed to be contiguous and in the same layout as hdf variable and in the C-style storage order void record_aux(const std::string &name, typename solver_t::real_t *data) { - record_aux_hlpr(name, data, *hdfp); + record_aux_hlpr(name, data, *hdfp, false); + } + + void record_aux_refined(const std::string &name, typename solver_t::real_t *data) + { + record_aux_hlpr(name, data, *hdfp, true); } void record_aux_dsc(const std::string &name, const typename solver_t::arr_t &arr, bool srfc = false) @@ -413,6 +479,11 @@ namespace libmpdataxx record_aux_dsc_hlpr(name, arr, *hdfp, srfc); } + void record_aux_dsc_refined(const std::string &name, const typename solver_t::arr_t &arr) + { + record_aux_dsc_hlpr(name, arr, *hdfp, false, true); + } + void record_aux_scalar(const std::string &name, const std::string &group_name, typename solver_t::real_t data) { record_scalar_hlpr(name, group_name, data, *hdfp); @@ -471,9 +542,11 @@ namespace libmpdataxx } // see above, also assumes that z is the last dimension - void record_prof_const(const std::string &name, typename solver_t::real_t *data) + void record_prof_const(const std::string &name, typename solver_t::real_t *data, const bool refined = false) { assert(this->rank == 0); + const auto _shape(refined ? shape_ref : shape); + const auto _offst(refined ? offst_ref : offst); H5::H5File hdfcp(const_file, H5F_ACC_RDWR #if defined(USE_MPI) @@ -484,7 +557,7 @@ namespace libmpdataxx auto aux = hdfcp.createDataSet( name, flttype_output, - H5::DataSpace(1, &shape[parent_t::n_dims - 1]) + H5::DataSpace(1, &_shape[parent_t::n_dims - 1]) ); #if defined(USE_MPI) @@ -492,8 +565,8 @@ namespace libmpdataxx #endif { auto space = aux.getSpace(); - space.selectHyperslab(H5S_SELECT_SET, &shape[parent_t::n_dims - 1], &offst[parent_t::n_dims - 1]); - aux.write(data, flttype_solver, H5::DataSpace(1, &shape[parent_t::n_dims - 1]), space); + space.selectHyperslab(H5S_SELECT_SET, &_shape[parent_t::n_dims - 1], &_offst[parent_t::n_dims - 1]); + aux.write(data, flttype_solver, H5::DataSpace(1, &_shape[parent_t::n_dims - 1]), space); } } @@ -627,13 +700,32 @@ namespace libmpdataxx } } + // as above but for solvers with fractal grid refinement + void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag) + { + record_params(hdfcp, typename std::conditional + (parent_t::ct_params_t_::sgs_scheme) == solvers::iles, + typename solvers::mpdata_rhs_vip_prs_family_tag, // iles + typename std::conditional + (parent_t::ct_params_t_::sgs_scheme) == solvers::smg, + typename solvers::mpdata_rhs_vip_prs_sgs_smg_family_tag, // smg + typename solvers::mpdata_rhs_vip_prs_sgs_dns_family_tag // dns + >::type + >::type{} + ); + hdfcp.createGroup("fractal"); + const auto &group = hdfcp.openGroup("fractal"); + { + const auto type = H5::PredType::NATIVE_INT; + group.createAttribute("n_fra_iter", type, H5::DataSpace(1, &one)).write(type, &this->n_fra_iter); + } + } + // as above but for the boussinesq solver void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_boussinesq_family_tag) { - record_params(hdfcp, typename std::conditional - (parent_t::ct_params_t_::sgs_scheme) == solvers::iles, - typename solvers::mpdata_rhs_vip_prs_family_tag, - typename solvers::mpdata_rhs_vip_prs_sgs_smg_family_tag>::type{}); + record_params(hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag{}); + hdfcp.createGroup("boussinesq"); const auto &group = hdfcp.openGroup("boussinesq"); { diff --git a/libmpdata++/output/hdf5_xdmf.hpp b/libmpdata++/output/hdf5_xdmf.hpp index 91425fce..9e1d7404 100644 --- a/libmpdata++/output/hdf5_xdmf.hpp +++ b/libmpdata++/output/hdf5_xdmf.hpp @@ -36,8 +36,8 @@ namespace libmpdataxx static_assert(parent_t::n_dims > 1, "only 2D and 3D output supported"); std::vector timesteps; - //xdmf writer - detail::xdmf_writer xdmfw; + //xdmf writer, additional one for refined data (separate .xmf files describing the refined grid, TODO: use two grids in one file? Paraview AMR dataset could help?) + detail::xdmf_writer xdmfw, xdmfw_ref; void start(const typename parent_t::advance_arg_t nt) { @@ -56,7 +56,8 @@ namespace libmpdataxx if (this->mem->G.get() != nullptr) xdmfw.add_const_attribute("G", this->const_name, this->mem->distmem.grid_size.data()); - xdmfw.setup(this->const_name, this->dim_names, attr_names, this->mem->distmem.grid_size.data()); + xdmfw.setup( this->const_name, this->dim_names, attr_names, this->mem->distmem.grid_size.data() ); + xdmfw_ref.setup(this->const_name, this->dim_names_ref, {}, this->mem->distmem.grid_size_ref.data()); } } @@ -70,10 +71,14 @@ namespace libmpdataxx std::string xmf_name = this->base_name() + ".xmf"; xdmfw.write(this->outdir + "/" + xmf_name, this->hdf_name(), this->record_time); + std::string xmf_ref_name = this->base_name() + "_ref.xmf"; + xdmfw_ref.write(this->outdir + "/" + xmf_ref_name, this->hdf_name(), this->record_time); + // save the xmf filename for temporal write - timesteps.push_back(xmf_name); + timesteps.push_back(this->base_name()); // write temporal xmf - xdmfw.write_temporal(this->outdir + "/temp.xmf", timesteps); + xdmfw.write_temporal(this->outdir + "/temp.xmf", timesteps, ".xmf"); + xdmfw_ref.write_temporal(this->outdir + "/temp_ref.xmf", timesteps, "_ref.xmf"); } } @@ -92,6 +97,15 @@ namespace libmpdataxx parent_t::record_aux(name, data); } + void record_aux_refined(const std::string &name, typename solver_t::real_t *data) + { +#if defined(USE_MPI) + if (this->mem->distmem.rank() == 0) +#endif + xdmfw_ref.add_attribute(name, this->hdf_name(), this->mem->distmem.grid_size_ref.data()); + parent_t::record_aux_refined(name, data); + } + void record_aux_dsc(const std::string &name, const typename solver_t::arr_t &arr, bool srfc = false) { auto shape = this->mem->distmem.grid_size; @@ -103,6 +117,16 @@ namespace libmpdataxx parent_t::record_aux_dsc(name, arr, srfc); } + void record_aux_dsc_refined(const std::string &name, const typename solver_t::arr_t &arr) + { + auto shape = this->mem->distmem.grid_size_ref; +#if defined(USE_MPI) + if (this->mem->distmem.rank() == 0) +#endif + xdmfw_ref.add_attribute(name, this->hdf_name(), shape.data()); + parent_t::record_aux_dsc_refined(name, arr); + } + public: // ctor diff --git a/libmpdata++/solvers/detail/boussinesq_common.hpp b/libmpdata++/solvers/detail/boussinesq_common.hpp index c7c1a6c0..e88203fa 100644 --- a/libmpdata++/solvers/detail/boussinesq_common.hpp +++ b/libmpdata++/solvers/detail/boussinesq_common.hpp @@ -5,7 +5,7 @@ * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) */ #pragma once -#include +#include namespace libmpdataxx { @@ -14,9 +14,9 @@ namespace libmpdataxx namespace detail { template - class boussinesq_common : public libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs + class boussinesq_common : public libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra { - using parent_t = libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs; + using parent_t = libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra; public: using real_t = typename ct_params_t::real_t; diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp index 07d15f50..421dbeff 100644 --- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp +++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp @@ -50,6 +50,8 @@ namespace libmpdataxx arrvec_t &drv; arrvec_t &wrk; + // TODO: make all ijk... objects const + // like ijk, but containing vectors at the lower/left/fre edge // CAUTION: on sharedmem, ijkm contains overlapping ranges idx_t ijkm; @@ -60,7 +62,7 @@ namespace libmpdataxx // like ijk, but with range in x direction extended by 1 to the left for MPI compliance. // MPI requires that vector between two process domains is calculated by the process to the right of it (c.f. remote_2d.hpp fill_halos_vctr_alng) // TODO: change MPI logic to assume that it is calculated by the process to the left? then, ijk_vec would not be needed(?) - std::array ijk_vec; + std::array ijk_vec; // TODO: replace std::array with idx_t real_t cdrag; diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp new file mode 100644 index 00000000..23e1bc0e --- /dev/null +++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp @@ -0,0 +1,251 @@ +/** @file + * @copyright University of Warsaw + * @section LICENSE + * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) + */ + +#pragma once + +#include +#include + +namespace libmpdataxx +{ + namespace solvers + { + namespace detail + { + template + class mpdata_rhs_vip_prs_sgs_fra_dim + { +// static_assert(false); + }; + + template + class mpdata_rhs_vip_prs_sgs_fra_dim< + ct_params_t, + minhalo, + typename std::enable_if_t + > : public detail::mpdata_rhs_vip_prs_sgs_fra_common + { + public: + + using parent_t = detail::mpdata_rhs_vip_prs_sgs_fra_common; + using parent_t::parent_t; // inheriting constructors + using real_t = typename ct_params_t::real_t; + + private: + + // helper variables for grid refinement + // TODO: replace with idx_t: mid_ijk_r2r ijk_r2r_h + rng_t mid_ijk_r2r_0, mid_ijk_r2r_1, mid_ijk_r2r_2; // positions between already known values (to be filled during given iteration) + rng_t ijk_r2r_0_h_with_halo, ijk_r2r_1_h, ijk_r2r_2_h; // all positions at resolution of given iteration + int stride, hstride; // stride and half stride + + // fill distmem halos of refinee + // TODO: move to bcond or sth? would be filled only by remote bcond + void fill_refinee_distmem_halos(const int e, const int halo_size) + { + const int e_ref = this->ix_r2r.at(e); + // TODO: we only need to xchng along distmem direction (x) + this->xchng(e); + + // TODO: assert halo_size <= solver halo size + + // TODO: DRY! + switch(halo_size) + { + case 2: + if(this->mem->distmem.rank() < this->mem->distmem.size() - 1) + this->mem->psi_ref[e_ref]( + this->mem->grid_size_ref[0].last() + this->mem->n_ref / 2 + this->mem->n_ref , + this->ijk_r2r[1], + this->ijk_r2r[2] + ) = + this->mem->psi[e][0]( + this->ijk[0].last()+2, + this->ijk[1], + this->ijk[2] + ); + if(this->mem->distmem.rank() > 0) + this->mem->psi_ref[e_ref]( + this->mem->grid_size_ref[0].first() - this->mem->n_ref / 2 - this->mem->n_ref , + this->ijk_r2r[1], + this->ijk_r2r[2] + ) = + this->mem->psi[e][0]( + this->ijk[0].first()-2, + this->ijk[1], + this->ijk[2] + ); + // no break intentionally + case 1: + if(this->mem->distmem.rank() < this->mem->distmem.size() - 1) + this->mem->psi_ref[e_ref]( + this->mem->grid_size_ref[0].last() + this->mem->n_ref / 2, + this->ijk_r2r[1], + this->ijk_r2r[2] + ) = + this->mem->psi[e][0]( + this->ijk[0].last()+1, + this->ijk[1], + this->ijk[2] + ); + if(this->mem->distmem.rank() > 0) + this->mem->psi_ref[e_ref]( + this->mem->grid_size_ref[0].first() - this->mem->n_ref / 2, + this->ijk_r2r[1], + this->ijk_r2r[2] + ) = + this->mem->psi[e][0]( + this->ijk[0].first()-1, + this->ijk[1], + this->ijk[2] + ); + break; + default: + assert(false); + break; + } + } + + void refinement_ranges(const int iter, const int halo_size) + { + // messy, because in domain decomposition (sharedmem and distmem) some refined scalars are on the edge of the subdomain... + if(iter==0) + { + mid_ijk_r2r_0 = this->rng_midpoints(this->ijk_r2r[0], this->mem->distmem.rank(), this->mem->distmem.size()); + mid_ijk_r2r_1 = this->rng_midpoints_in_out(this->ijk_r2r[1], this->rank, this->mem->size); // different because we dont want overlapping ranges in y + mid_ijk_r2r_2 = this->rng_midpoints(this->ijk_r2r[2]); + + ijk_r2r_1_h = this->ijk_r2r[1]; + ijk_r2r_2_h = this->ijk_r2r[2]; + } + else + { + if(iter==1) + mid_ijk_r2r_0 = this->rng_midpoints_in(mid_ijk_r2r_0, this->mem->distmem.rank(), this->mem->distmem.size()); + else + mid_ijk_r2r_0 = this->rng_midpoints_out(mid_ijk_r2r_0); + + mid_ijk_r2r_1 = this->rng_midpoints_out(mid_ijk_r2r_1); + mid_ijk_r2r_2 = this->rng_midpoints_out(mid_ijk_r2r_2); + + ijk_r2r_1_h = this->rng_half_stride(ijk_r2r_1_h, this->rank, this->mem->size); + ijk_r2r_2_h = this->rng_half_stride(ijk_r2r_2_h); + } + + stride = ijk_r2r_1_h.stride(); + assert(ijk_r2r_1_h.stride() == ijk_r2r_2_h.stride()); + assert(stride % 2 == 0); + hstride = stride / 2; + + ijk_r2r_0_h_with_halo = rng_t( + this->mem->distmem.rank() == 0 ? this->ijk_r2r[0].first() : this->ijk_r2r[0].first() - halo_size * this->mem->n_ref, + this->mem->distmem.rank() == this->mem->distmem.size() - 1 ? this->ijk_r2r[0].last() : this->ijk_r2r[0].last() + halo_size * this->mem->n_ref, + hstride + ); + } + + // TODO: stretching parameters at the overlaps of the reconstructed and resolved arrays (ijk_r2r) are not used, do not generate them + // also not all parameters in the halo are needed (but some are!) + void generate_stretching_parameters(const int rng_seed = 44) + { + std::mt19937 gen(rng_seed); + std::uniform_real_distribution<> dis(-1, 1); // [-1,1), but whatever + auto rand = std::bind(dis, gen); + std::generate(this->d_j(this->ijk_ref_with_halo).begin(), this->d_j(this->ijk_ref_with_halo).end(), rand); + this->d_j(this->ijk_ref_with_halo) = formulae::fractal::d_of_CDF_fctr{}(this->d_j(this->ijk_ref_with_halo)); + } + + protected: + + void interpolate_refinee(const int e = 0) + { + assert(opts::isset(ct_params_t::fractal_recon, opts::bit(e))); + const int halo_size = 1; + const int e_ref = this->ix_r2r.at(e); + + //// TEMPORARY + //this->mem->psi_ref[e_ref] = -1000; + //this->mem->barrier(); + + + fill_refinee_distmem_halos(e, halo_size); + + // fill refined array at position where it overlaps with the resolved array + this->mem->refinee(e_ref)(this->ijk_r2r) = this->mem->advectee(e)(this->ijk); + +// generate_stretching_parameters(); + + for(int i=0; in_fra_iter; ++i) + { + refinement_ranges(i, halo_size); + + formulae::fractal::intrp<0, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_0, ijk_r2r_1_h, ijk_r2r_2_h, hstride); + this->mem->barrier(); + formulae::fractal::intrp<1, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_1, ijk_r2r_2_h, ijk_r2r_0_h_with_halo, hstride); + formulae::fractal::intrp<2, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_2, ijk_r2r_0_h_with_halo, this->rng_merge(ijk_r2r_1_h, mid_ijk_r2r_1), hstride); + } + this->mem->barrier(); + } + + void reconstruct_refinee(const int e = 0) + { + assert(opts::isset(ct_params_t::fractal_recon, opts::bit(e))); + const int halo_size = 2; + const int e_ref = this->ix_r2r.at(e); + + //// TEMPORARY + //this->mem->psi_ref[e_ref] = -1000; + //this->mem->barrier(); + + //std::cerr << "mem->grid_size_ref[0]: " << this->mem->grid_size_ref[0] << std::endl; + //std::cerr << "mem->grid_size_ref[1]: " << this->mem->grid_size_ref[1] << std::endl; + //std::cerr << "mem->grid_size_ref[2]: " << this->mem->grid_size_ref[2] << std::endl; + + fill_refinee_distmem_halos(e, halo_size); + + // fill refined array at position where it overlaps with the resolved array + this->mem->refinee(e_ref)(this->ijk_r2r) = this->mem->advectee(e)(this->ijk); + + generate_stretching_parameters(); + + for(int i=0; in_fra_iter; ++i) + { + refinement_ranges(i, halo_size); + + formulae::fractal::rcnstrct<0, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_0), ijk_r2r_1_h, ijk_r2r_2_h, hstride, this->c_j, this->d_j, this->f_j); + this->mem->barrier(); + formulae::fractal::rcnstrct<1, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_1) , ijk_r2r_2_h, ijk_r2r_0_h_with_halo, hstride, this->c_j, this->d_j, this->f_j); + formulae::fractal::rcnstrct<2, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_2) , ijk_r2r_0_h_with_halo, this->rng_merge(ijk_r2r_1_h, mid_ijk_r2r_1), hstride, this->c_j, this->d_j, this->f_j); + } + this->mem->barrier(); + } + + public: + + // helper method to allocate n_arr refined scalar temporary arrays + static void alloc_tmp_sclr_ref( + typename parent_t::mem_t *mem, + const char * __file__, const int n_arr, + std::string name = "", + bool srfc = false + ) + { + mem->tmp[__file__].push_back(new arrvec_t()); + + if (!name.empty()) mem->avail_tmp[name] = std::make_pair(__file__, mem->tmp[__file__].size() - 1); + + for (int n = 0; n < n_arr; ++n) + mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t( + parent_t::rng_ref_distmem_halo(mem->grid_size_ref[0], mem->n_ref, mem->distmem.rank(), mem->distmem.size()), + mem->grid_size_ref[1], + srfc ? rng_t(0, 0) : mem->grid_size_ref[2], + arr3D_storage + ))); + } + }; + } // namespace detail + } // namespace solvers +} // namespace libmpdataxx diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp new file mode 100644 index 00000000..a01780cf --- /dev/null +++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp @@ -0,0 +1,201 @@ +/** + * @file + * @copyright University of Warsaw + * @section LICENSE + * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) + * + */ + +// solver with fractal reconstruction of SGS fields + +#pragma once + +#include +//#include +//#include +#include + +namespace libmpdataxx +{ + namespace solvers + { + namespace detail + { + // TODO: minimum halo size is 2 + template + class mpdata_rhs_vip_prs_sgs_fra_common : public mpdata_rhs_vip_prs_sgs + { + using parent_t = mpdata_rhs_vip_prs_sgs; + + public: + + using real_t = typename ct_params_t::real_t; + + protected: + + typename parent_t::arr_t c_j, d_j, f_j; // parameters used in fractal reconstruction, Akinlabi et al. 2019 + + + const int n_ref, // number of refinements; refined resolution is dx / n_ref + n_fra_iter; // number of iterations of grid refinement + +// const int halo_ref; // size of halos of refined scalar (and vector?) arrays + + const std::array ix_r2r; + constexpr std::array get_ix_r2r() + { + std::array ix_r2r{}; + int j = 0; + for(opts::opts_t e=0; e ijk_ref, // range of refinee handled by given solver + ijk_ref_with_halo; // same but with a halo in x direction between MPI processes + const idxs_t ijk_r2r; // resolved to refined; refined scalars at the same position as resolved scalars + + // range modifying methods used in grid refinement + // TODO: unify + + static rng_t rng_midpoints(const rng_t &rng, const int rank = 0, const int size = 1) + { + assert(rng.stride() % 2 == 0); + if(rng.last() - rng.first() < rng.stride()) return rng; + return rng_t( + rank == 0 ? rng.first() + rng.stride() / 2 : rng.first() - rng.stride() / 2, + rank == size - 1 ? rng.last() - rng.stride() / 2 : rng.last() + rng.stride() / 2, + rng.stride()); + } + + static rng_t rng_midpoints_in_out(const rng_t &rng, const int rank = 0, const int size = 1) + { + assert(rng.stride() % 2 == 0); + if(rng.last() - rng.first() < rng.stride()) return rng; + return rng_t( + rng.first() + rng.stride() / 2, + rank == size - 1 ? rng.last() - rng.stride() / 2 : rng.last() + rng.stride() / 2, + rng.stride()); + } + + static rng_t rng_midpoints_in(const rng_t &rng, const int rank = 0, const int size = 1) + { + assert(rng.stride() % 4 == 0); + if(rng.last() - rng.first() < rng.stride()) return rng; + else return rng_t( + rank == 0 ? rng.first() - rng.stride() / 4 : rng.first() + rng.stride() / 4, + rank == size - 1 ? rng.last() + rng.stride() / 4 : rng.last() - rng.stride() / 4, + rng.stride() / 2); + } + + static rng_t rng_midpoints_out(const rng_t &rng) + { + assert(rng.stride() % 4 == 0); + if(rng.last() - rng.first() < rng.stride()) return rng; + else return rng_t( + rng.first() - rng.stride() / 4, + rng.last() + rng.stride() / 4, + rng.stride() / 2); + } + + static rng_t rng_half_stride(const rng_t &rng, const int rank = 0, const int size = 1) + { + assert(rng.stride() % 2 == 0); + return rng_t( + rng.first(), + rank < size - 1 ? rng.last() + rng.stride() / 2 : rng.last(), + rng.stride() / 2); + } + + // reconstruction based on 3 points + // input is an array pointing to midpoints (to be reconstructed), but with a stride jumping on each one + // returned range points to the first from the pair of reconstructed points + // to avoid boundary conditions, in y and z directions it is assumed that the number of points is 3+i*2, i=0,1,2,... (this check is somewhere else) + // however in the x direction there can be any number of points, because the domain is divided between mpi processes... + static rng_t rng_dbl_stride(const rng_t &rng) + { + assert(rng.last() != rng.first()); // we need at least 2 midpoints + assert(rng.stride() % 2 == 0); + + // if domain starts with a point in the middle of a triple - we need to start calculating from a point to the left + const int offset = ( (rng.first() - rng.stride() / 2) / rng.stride() ) % 2 == 0 ? 0 : - rng.stride(); + const auto first = rng.first() + offset; + + if( ((rng.last() - first) / rng.stride() + 1) % 2 == 0) // even number of midpoints; y and z directions (and sometimes x) + return rng_t(first, rng.last() - rng.stride(), 2*rng.stride()); + else // odd number of midpoints + return rng_t(first, rng.last(), 2*rng.stride()); // rely on the halo along x direction + } + + static rng_t rng_merge(const rng_t &rng1, const rng_t &rng2) + { + assert(rng1.stride() == rng2.stride()); + return rng_t( + std::min(rng1.first(), rng2.first()), + std::max(rng1.last(), rng2.last()), + rng1.stride() / 2); + } + + // reconstruction based on 3 points, we need up to 2 resolved points between MPI domains + static rng_t rng_ref_distmem_halo(const rng_t &rng, const int &n_ref, const int rank = 0, const int size = 1) + { + return rng_t( + rank == 0 ? rng.first() : rng.first() - (n_ref + n_ref/2), + rank < size - 1 ? rng.last() + (n_ref + n_ref/2) : rng.last(), + rng.stride()); + } + + public: + + struct rt_params_t : parent_t::rt_params_t + { + int n_fra_iter = 1; // number of iterations of fractal reconstruction + }; + + // ctor + mpdata_rhs_vip_prs_sgs_fra_common( + typename parent_t::ctor_args_t args, + const rt_params_t &p + ) : + parent_t(args, p), + n_fra_iter(p.n_fra_iter), + n_ref(args.mem->n_ref), +// halo_ref(n_ref / 2), + ix_r2r(get_ix_r2r()), + // ijk_ref init below assumes 3D (shmem decomp dim along y); + // TODO: move this to concurr_common::init()? add something to ctor_args_t? + ijk_r2r{ + {this->ijk[0].first() * n_ref, this->ijk[1].first() * n_ref, this->ijk[2].first() * n_ref}, // lbound + {this->ijk[0].last() * n_ref, this->ijk[1].last() * n_ref, this->ijk[2].last() * n_ref}, // ubound + {n_ref, n_ref, n_ref}, // stride + } + { + assert(p.n_fra_iter > 0); + assert(n_ref % 2 == 0); + + for (int d = 0; d < ct_params_t::n_dims; ++d) + { + if(d==1) + { + ijk_ref.lbound(d) = this->mem->slab(this->mem->grid_size_ref[d], this->rank, this->mem->size).first();// this->ijk.lbound(d) * n_ref; + ijk_ref.ubound(d) = this->mem->slab(this->mem->grid_size_ref[d], this->rank, this->mem->size).last(); + } + else + { + ijk_ref.lbound(d) = this->mem->grid_size_ref[d].first(); + ijk_ref.ubound(d) = this->mem->grid_size_ref[d].last(); + } + } + ijk_ref_with_halo = ijk_ref; + const auto ijk_ref_with_halo_rng_0 = rng_ref_distmem_halo(ijk_ref[0], this->mem->n_ref, this->mem->distmem.rank(), this->mem->distmem.size()); + ijk_ref_with_halo.lbound(0) = ijk_ref_with_halo_rng_0.first(); + ijk_ref_with_halo.ubound(0) = ijk_ref_with_halo_rng_0.last(); + } + }; + } // namespace detail + } // namespace solvers +} // namespace libmpdataxx diff --git a/libmpdata++/solvers/detail/solver_1d.hpp b/libmpdata++/solvers/detail/solver_1d.hpp index 1b007aa4..336284fd 100644 --- a/libmpdata++/solvers/detail/solver_1d.hpp +++ b/libmpdata++/solvers/detail/solver_1d.hpp @@ -199,6 +199,15 @@ namespace libmpdataxx protected: // helper method to allocate a vector-component temporary array + static void alloc_tmp_vctr( + typename parent_t::mem_t *mem, + const char * __file__, + const std::array grid_size + ) + { + alloc_tmp(mem, __file__, 1, parent_t::rng_vctr(grid_size[0])); // always one-component vectors + } + static void alloc_tmp_vctr( typename parent_t::mem_t *mem, const char * __file__ diff --git a/libmpdata++/solvers/detail/solver_2d.hpp b/libmpdata++/solvers/detail/solver_2d.hpp index fa8876f4..e9e515e6 100644 --- a/libmpdata++/solvers/detail/solver_2d.hpp +++ b/libmpdata++/solvers/detail/solver_2d.hpp @@ -364,6 +364,7 @@ namespace libmpdataxx const char * __file__, const int n_arr, const std::vector> &stgr, + const std::array grid_size, bool srfc = false ) { @@ -371,21 +372,42 @@ namespace libmpdataxx for (int n = 0; n < n_arr; ++n) { mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t( - stgr[n][0] ? parent_t::rng_vctr(mem->grid_size[0]) : parent_t::rng_sclr(mem->grid_size[0]), + stgr[n][0] ? parent_t::rng_vctr(grid_size[0]) : parent_t::rng_sclr(grid_size[0]), srfc ? rng_t(0, 0) : - stgr[n][1] ? parent_t::rng_vctr(mem->grid_size[1]) : - parent_t::rng_sclr(mem->grid_size[1]) + stgr[n][1] ? parent_t::rng_vctr(grid_size[1]) : + parent_t::rng_sclr(grid_size[1]) ))); } } + // version with default grid size + static void alloc_tmp_stgr( + typename parent_t::mem_t *mem, + const char * __file__, + const int n_arr, + const std::vector> &stgr, + bool srfc = false // allocate only surface data + ) + { + alloc_tmp_stgr(mem, __file__, n_arr, stgr, mem->grid_size, srfc); + } + // helper method to allocate a temporary space composed of vector-component arrays + static void alloc_tmp_vctr( + typename parent_t::mem_t *mem, + const char * __file__, + const std::array grid_size + ) + { + alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}}, grid_size, false); + } + static void alloc_tmp_vctr( typename parent_t::mem_t *mem, const char * __file__ ) { - alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}}); + alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}}, false); } // helper method to allocate n_arr scalar temporary arrays @@ -393,7 +415,7 @@ namespace libmpdataxx typename parent_t::mem_t *mem, const char * __file__, const int n_arr, std::string name = "", - bool srfc = false + bool srfc = false ) { mem->tmp[__file__].push_back(new arrvec_t()); diff --git a/libmpdata++/solvers/detail/solver_3d.hpp b/libmpdata++/solvers/detail/solver_3d.hpp index ea62aac9..3391dd2b 100644 --- a/libmpdata++/solvers/detail/solver_3d.hpp +++ b/libmpdata++/solvers/detail/solver_3d.hpp @@ -488,6 +488,7 @@ namespace libmpdataxx const char * __file__, const int n_arr, const std::vector> &stgr, + const std::array grid_size, bool srfc = false // allocate only surface data ) { @@ -495,23 +496,44 @@ namespace libmpdataxx for (int n = 0; n < n_arr; ++n) { mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t( - stgr[n][0] ? parent_t::rng_vctr(mem->grid_size[0]) : parent_t::rng_sclr(mem->grid_size[0]), - stgr[n][1] ? parent_t::rng_vctr(mem->grid_size[1]) : parent_t::rng_sclr(mem->grid_size[1]), + stgr[n][0] ? parent_t::rng_vctr(grid_size[0]) : parent_t::rng_sclr(grid_size[0]), + stgr[n][1] ? parent_t::rng_vctr(grid_size[1]) : parent_t::rng_sclr(grid_size[1]), srfc ? rng_t(0, 0) : - stgr[n][2] ? parent_t::rng_vctr(mem->grid_size[2]) : - parent_t::rng_sclr(mem->grid_size[2]), + stgr[n][2] ? parent_t::rng_vctr(grid_size[2]) : + parent_t::rng_sclr(grid_size[2]), arr3D_storage ))); } } + // version with default grid size + static void alloc_tmp_stgr( + typename parent_t::mem_t *mem, + const char * __file__, + const int n_arr, + const std::vector> &stgr, + bool srfc = false // allocate only surface data + ) + { + alloc_tmp_stgr(mem, __file__, n_arr, stgr, mem->grid_size, srfc); + } + // helper method to allocate a temporary space composed of vector-component arrays + static void alloc_tmp_vctr( + typename parent_t::mem_t *mem, + const char * __file__, + const std::array grid_size + ) + { + alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}}, grid_size, false); + } + static void alloc_tmp_vctr( typename parent_t::mem_t *mem, const char * __file__ ) { - alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}}); + alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}}, false); } // helper method to allocate n_arr scalar temporary arrays @@ -519,7 +541,7 @@ namespace libmpdataxx typename parent_t::mem_t *mem, const char * __file__, const int n_arr, std::string name = "", - bool srfc = false // allocate only surface data + bool srfc = false ) { mem->tmp[__file__].push_back(new arrvec_t()); diff --git a/libmpdata++/solvers/detail/solver_common.hpp b/libmpdata++/solvers/detail/solver_common.hpp index 5aab5f07..90126ab2 100644 --- a/libmpdata++/solvers/detail/solver_common.hpp +++ b/libmpdata++/solvers/detail/solver_common.hpp @@ -8,9 +8,10 @@ #include #include -#include +#include #include +#include #include @@ -72,7 +73,18 @@ namespace libmpdataxx real_t time = 0; std::vector n; - typedef concurr::detail::sharedmem mem_t; + using shmem_ref_t = concurr::detail::sharedmem_refined; + using shmem_t = concurr::detail::sharedmem; + + public: + + // if fractal reconstruction of some variables is required in ct_params, use special type of sharedmem + using mem_t = typename std::conditional_t< + detail::slvr_with_frac_recn_v, + shmem_ref_t, shmem_t>; + + protected: + mem_t *mem; // helper methods invoked by solve() diff --git a/libmpdata++/solvers/detail/solver_type_traits.hpp b/libmpdata++/solvers/detail/solver_type_traits.hpp new file mode 100644 index 00000000..bbf60361 --- /dev/null +++ b/libmpdata++/solvers/detail/solver_type_traits.hpp @@ -0,0 +1,23 @@ +#pragma once + +namespace libmpdataxx +{ + namespace solvers + { + namespace detail + { + // helpers that detect if the solver uses fractal reconstruction + template< class, class = void > + struct slvr_with_frac_recn : std::false_type { }; + + template< class ct_params_t > + struct slvr_with_frac_recn< + ct_params_t, + typename std::enable_if_t<(int)ct_params_t::fractal_recon != (int)0 > > : std::true_type { }; + //std::enable_if_t< std::is_base_of_v< libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag, solver_t > > > : std::true_type { }; + + template + inline constexpr bool slvr_with_frac_recn_v = slvr_with_frac_recn::value; + } + } +} diff --git a/libmpdata++/solvers/mpdata_rhs.hpp b/libmpdata++/solvers/mpdata_rhs.hpp index 351d7c43..d247ae1a 100644 --- a/libmpdata++/solvers/mpdata_rhs.hpp +++ b/libmpdata++/solvers/mpdata_rhs.hpp @@ -45,6 +45,7 @@ namespace libmpdataxx #endif protected: + using solver_family = mpdata_rhs_family_tag; // member fields diff --git a/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp new file mode 100644 index 00000000..4397571b --- /dev/null +++ b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp @@ -0,0 +1,71 @@ +/** + * @file + * @copyright University of Warsaw + * @section LICENSE + * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/) + * + */ + +#pragma once + +#include + +namespace libmpdataxx +{ + namespace solvers + { + struct mpdata_rhs_vip_prs_sgs_fra_family_tag {}; + + template + class mpdata_rhs_vip_prs_sgs_fra; + + // no fields with fractal reconstruction, inherit from mpdata_rhs_vip_prs_sgs + template + class mpdata_rhs_vip_prs_sgs_fra< + ct_params_t, minhalo, + typename std::enable_if_t<(int)ct_params_t::fractal_recon == (int)0> + > : public mpdata_rhs_vip_prs_sgs + { + using parent_t = mpdata_rhs_vip_prs_sgs; + using parent_t::parent_t; // inheriting constructors + }; + + template + class mpdata_rhs_vip_prs_sgs_fra< + ct_params_t, minhalo, + typename std::enable_if_t<(int)ct_params_t::fractal_recon != (int)0> + > : public detail::mpdata_rhs_vip_prs_sgs_fra_dim + { + using parent_t = detail::mpdata_rhs_vip_prs_sgs_fra_dim; + + protected: + using solver_family = mpdata_rhs_vip_prs_sgs_fra_family_tag; + + //ctor + mpdata_rhs_vip_prs_sgs_fra( + typename parent_t::ctor_args_t args, + const typename parent_t::rt_params_t &p + ) : + parent_t(args, p) + { + this->c_j.reference(args.mem->tmp[__FILE__][1][0]); + this->d_j.reference(args.mem->tmp[__FILE__][1][1]); + this->f_j.reference(args.mem->tmp[__FILE__][1][2]); + } + + // why alloc here? + static void alloc( + typename parent_t::mem_t *mem, + const int &n_iters + ) { + parent_t::alloc(mem, n_iters); + parent_t::alloc_tmp_sclr_ref(mem, __FILE__, opts::nset(ct_params_t::fractal_recon)); + parent_t::alloc_tmp_sclr_ref(mem, __FILE__, 3); // c_j, d_j, f_j +// parent_t::alloc_tmp_vctr(mem, __FILE__, mem->grid_size_ref); // GC_ref + + mem->psi_ref = mem->tmp[__FILE__][0]; +// mem->GC_ref = mem->tmp[__FILE__][1]; + } + }; + } // namespace solvers +} // namespace libmpdataxx diff --git a/tests/sandbox/pbl/pbl.hpp b/tests/sandbox/pbl/pbl.hpp index 0b2f7079..7ff0e255 100644 --- a/tests/sandbox/pbl/pbl.hpp +++ b/tests/sandbox/pbl/pbl.hpp @@ -59,7 +59,11 @@ class pbl : public libmpdataxx::output::hdf5_xdmftimestep % static_cast(this->outfreq) == 0) { + //this->reconstruct_refinee(ix::w); + this->reconstruct_refinee(ix::tht); + if (this->rank == 0) std::cout << this->timestep << std::endl; + this->mem->barrier(); if (this->rank == 0) { @@ -68,6 +72,17 @@ class pbl : public libmpdataxx::output::hdf5_xdmfrecord_aux_dsc("tke", this->tke); } this->record_aux_dsc("p", this->Phi); + //this->record_aux_dsc_refined("w reconstructed", this->mem->refinee(this->ix_r2r.at(ix::w))); + this->record_aux_dsc_refined("tht reconstructed", this->mem->refinee(this->ix_r2r.at(ix::tht))); + } + this->mem->barrier(); + + this->interpolate_refinee(ix::tht); + + this->mem->barrier(); + if (this->rank == 0) + { + this->record_aux_dsc_refined("tht interpolated", this->mem->refinee(this->ix_r2r.at(ix::tht))); } this->mem->barrier(); } diff --git a/tests/sandbox/pbl/pbl_test_def.hpp b/tests/sandbox/pbl/pbl_test_def.hpp index ef63d592..9409c4bb 100644 --- a/tests/sandbox/pbl/pbl_test_def.hpp +++ b/tests/sandbox/pbl/pbl_test_def.hpp @@ -22,6 +22,7 @@ template void set_sgs_specific(params_t &p, iles_tag) { p.iles_cdrag = 0.1; + p.n_fra_iter = 2; } // smagorinsky @@ -32,6 +33,7 @@ void set_sgs_specific(params_t &p, smg_tag) p.smg_c = 0.165; p.prandtl_num = 0.42; p.cdrag = 0.1; + p.n_fra_iter = 4; } template @@ -54,6 +56,8 @@ void test(const std::string &dirname, const int np, const int nt) u, v, w, tht, vip_i=u, vip_j=v, vip_k=w, vip_den=-1 }; }; + //enum { fractal_recon = libmpdataxx::opts::bit(ix::w) }; + enum { fractal_recon = libmpdataxx::opts::bit(ix::tht) }; }; using ix = typename ct_params_t::ix;