diff --git a/libmpdata++-config.cmake b/libmpdata++-config.cmake
index 32c4e83a..0415f1ec 100644
--- a/libmpdata++-config.cmake
+++ b/libmpdata++-config.cmake
@@ -32,7 +32,7 @@ set(libmpdataxx_INCLUDE_DIRS "${CMAKE_CURRENT_LIST_DIR}/../../include/")
 
 ############################################################################################
 # debug mode compiler flags
-set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++14 -DBZ_DEBUG -g -Wno-enum-compare") #TODO: -Og if compiler supports it?
+set(libmpdataxx_CXX_FLAGS_DEBUG "${libmpdataxx_CXX_FLAGS_DEBUG} -std=c++17 -DBZ_DEBUG -g -Wno-enum-compare -Wfatal-errors") #TODO: -Og if compiler supports it?
 
 
 ############################################################################################
@@ -42,7 +42,7 @@ if(
   CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
   CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" 
 )
-  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++14 -DNDEBUG -Ofast -march=native -Wno-enum-compare")
+  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=c++17 -DNDEBUG -Ofast -march=native -Wno-enum-compare")
 
   # preventing Kahan summation from being optimised out
   if (
@@ -58,7 +58,7 @@ if(
   CMAKE_CXX_COMPILER_ID STREQUAL "Intel"
 )
   # flags taken from -fast but without -static
-  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++14 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2")
+  set(libmpdataxx_CXX_FLAGS_RELEASE "${libmpdataxx_CXX_FLAGS_RELEASE} -std=gnu++17 -DNDEBUG -xHOST -O3 -ipo -no-prec-div -fp-model fast=2")
 endif()
 
 
diff --git a/libmpdata++/blitz.hpp b/libmpdata++/blitz.hpp
index 53273bde..8a54410a 100644
--- a/libmpdata++/blitz.hpp
+++ b/libmpdata++/blitz.hpp
@@ -51,6 +51,7 @@
 namespace libmpdataxx
 {
   template <int n_dims> using idx_t = blitz::RectDomain<n_dims>;
+  template <int n_dims> using idxs_t = blitz::StridedDomain<n_dims>;
   using rng_t = blitz::Range;
 
   // non-int ix_t means either rng_t or idx_t
diff --git a/libmpdata++/concurr/boost_thread.hpp b/libmpdata++/concurr/boost_thread.hpp
index f1d7c7a6..c804e9bd 100644
--- a/libmpdata++/concurr/boost_thread.hpp
+++ b/libmpdata++/concurr/boost_thread.hpp
@@ -51,10 +51,8 @@ namespace libmpdataxx
 
 
         // ctor
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
-          b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0]))
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]))        {};
 
         void barrier()
         {
@@ -81,7 +79,7 @@ namespace libmpdataxx
 
       // ctor
       boost_thread(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };
diff --git a/libmpdata++/concurr/cxx11_thread.hpp b/libmpdata++/concurr/cxx11_thread.hpp
index 77dc47d9..408b7f3b 100644
--- a/libmpdata++/concurr/cxx11_thread.hpp
+++ b/libmpdata++/concurr/cxx11_thread.hpp
@@ -92,10 +92,8 @@ namespace libmpdataxx
         }
 
         // ctor
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) :
-          b(size(grid_size[0])),
-          parent_t::mem_t(grid_size, size(grid_size[0]))
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : b(size(grid_size[0])), parent_t::mem_t(grid_size, size(grid_size[0]))        {};
 
         void barrier()
         {
@@ -119,7 +117,7 @@ namespace libmpdataxx
 
       // ctor
       cxx11_thread(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };
diff --git a/libmpdata++/concurr/detail/concurr_common.hpp b/libmpdata++/concurr/detail/concurr_common.hpp
index 49e53eeb..5981803a 100644
--- a/libmpdata++/concurr/detail/concurr_common.hpp
+++ b/libmpdata++/concurr/detail/concurr_common.hpp
@@ -14,7 +14,7 @@
 #include <boost/ptr_container/ptr_vector.hpp>
 #include <libmpdata++/blitz.hpp>
 
-#include <libmpdata++/concurr/detail/sharedmem.hpp>
+#include <libmpdata++/concurr/detail/sharedmem_refined.hpp>
 #include <libmpdata++/concurr/detail/timer.hpp>
 #include <libmpdata++/concurr/any.hpp>
 
@@ -34,6 +34,8 @@
 #include <libmpdata++/bcond/remote_3d.hpp>
 #include <libmpdata++/bcond/gndsky_3d.hpp>
 
+#include <libmpdata++/solvers/detail/solver_type_traits.hpp>
+
 namespace libmpdataxx
 {
   namespace concurr
@@ -149,12 +151,7 @@ namespace libmpdataxx
 
         protected:
 
-        // (cannot be nested due to templates)
-        typedef sharedmem<
-          typename solver_t::real_t,
-          solver_t::n_dims,
-          solver_t::n_tlev
-        > mem_t;
+        using mem_t = typename solver_t::mem_t;
 
         // member fields
         boost::ptr_vector<solver_t> algos;
@@ -452,6 +449,15 @@ namespace libmpdataxx
           return mem->max(mem->advectee(e));
         }
       };
+
+      template< class mem_t, class solver_t>
+      mem_t* mem_factory(const typename solver_t::rt_params_t &p)
+      {
+        if constexpr (solvers::detail::slvr_with_frac_recn<typename solver_t::ct_params_t_>())
+          return new mem_t(p.grid_size, pow(2, p.n_fra_iter));
+        else
+          return new mem_t(p.grid_size);
+      }
     } // namespace detail
   } // namespace concurr
 } // namespace libmpdataxx
diff --git a/libmpdata++/concurr/detail/distmem.hpp b/libmpdata++/concurr/detail/distmem.hpp
index d17503f0..a4d50357 100644
--- a/libmpdata++/concurr/detail/distmem.hpp
+++ b/libmpdata++/concurr/detail/distmem.hpp
@@ -54,6 +54,7 @@ namespace libmpdataxx
         public:
 
         std::array<int, n_dims> grid_size;
+        std::array<int, n_dims> grid_size_ref;
 
         int rank()
         {
diff --git a/libmpdata++/concurr/detail/sharedmem.hpp b/libmpdata++/concurr/detail/sharedmem.hpp
index f6ae1c5e..9e888025 100644
--- a/libmpdata++/concurr/detail/sharedmem.hpp
+++ b/libmpdata++/concurr/detail/sharedmem.hpp
@@ -97,8 +97,6 @@ namespace libmpdataxx
               rng_t(0, grid_size[d]-1),
               d == 0 ? distmem.rank() : 0,          // decomposition along x, because that's MPI decomposition
               d == 0 ? distmem.size() : 1
-             // d == shmem_decomp_dim ? distmem.rank() : 0,
-             // d == shmem_decomp_dim ? distmem.size() : 1
             );
             origin[d] = this->grid_size[d].first();
           }
@@ -294,6 +292,7 @@ namespace libmpdataxx
         }
 
         virtual arr_t advectee(int e = 0) = 0;
+        virtual const arr_t advectee_global(int e = 0) = 0;
 
         void advectee_global_set(const arr_t arr, int e = 0)
         {
@@ -507,9 +506,11 @@ namespace libmpdataxx
       class sharedmem<real_t, 3, n_tlev> : public sharedmem_common<real_t, 3, n_tlev>
       {
         using parent_t = sharedmem_common<real_t, 3, n_tlev>;
-        using arr_t = typename parent_t::arr_t;
         using parent_t::parent_t; // inheriting ctors
 
+        protected:
+        using arr_t = typename parent_t::arr_t;
+
         public:
 
         virtual arr_t *never_delete(arr_t *arg) override
diff --git a/libmpdata++/concurr/detail/sharedmem_refined.hpp b/libmpdata++/concurr/detail/sharedmem_refined.hpp
new file mode 100644
index 00000000..6c393802
--- /dev/null
+++ b/libmpdata++/concurr/detail/sharedmem_refined.hpp
@@ -0,0 +1,122 @@
+/** @file
+ * @copyright University of Warsaw
+ * @section LICENSE
+ * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+ */
+
+// memory management with (fractal) grid refinement
+
+#pragma once
+
+#include "sharedmem.hpp"
+
+namespace libmpdataxx
+{
+  namespace concurr
+  {
+    namespace detail
+    {
+      template <
+        typename real_t,
+        int n_dims,
+        int n_tlev
+      >
+      class sharedmem_refined_common : public sharedmem<real_t, n_dims, n_tlev>
+      {
+        using parent_t = sharedmem<real_t, n_dims, n_tlev>;
+
+        protected:
+
+        using arr_t = typename parent_t::arr_t;
+
+        blitz::TinyVector<int, n_dims> origin_ref;
+
+        public:
+
+        const int n_ref; // number of equal divisions of the large cell (in each direction), refined resolution is dx / n_ref;
+                         // every n_ref scalar of the refined grid is at the same position as a scalar of the normal grid
+                         // no refinement done in the halo, because there are no SD in the halo (it's not real space)
+                         // what about MPI boundaries? there are refined points exactly at the boundary (since n_ref has to be even)
+                         // note: if there is a refined cell that is divided by the MPI boundary, o we need to add contributions from microphysics to both processes on both sides?
+                         //       maybe not, because microphysics contrbutions will affect the large cells, which are not divided by the MPI boundary...
+
+        std::array<rng_t, n_dims> grid_size_ref;
+        // TODO: these are public because used from outside in alloc - could friendship help?
+        //arrvec_t<arr_t> GC_ref, psi_ref; 
+        arrvec_t<arr_t> psi_ref; 
+
+        // ctors
+        sharedmem_refined_common(const std::array<int, n_dims> &grid_size, const int &size, const int &n_ref)
+          : parent_t(grid_size, size), n_ref(n_ref)
+        {
+          assert(n_ref % 2 == 0); // only division into even number of cells, because we assume that one of the refined scalar points is at the MPI boundary, which is in the middle between normal grid scalars
+
+          // for now, require a grid_size that is convenient for fractal reconstruction (which calculates 2 points based on 3 points)
+          // NOTE: fix this with proper halos (cyclic is easy, but what about rigid?)
+          // NOTE2: this is actually a requirement for fractal reconstruction, not for any grid refinement, so move this somewhere else
+          for (int d = 0; d < n_dims; ++d)
+            if((grid_size[d] - 3) % 2 != 0) throw std::runtime_error("Fractal grid refinement requires nx/ny/nz = 3 + 2 * i, where i = 0,1,2,3,...");
+
+          for (int d = 0; d < n_dims; ++d)
+          {
+            grid_size_ref[d] = refine_grid_size(
+              this->grid_size[d],
+              n_ref,
+              d == 0 ? this->distmem.rank() : 0,
+              d == 0 ? this->distmem.size() : 1
+            );
+            origin_ref[d] = grid_size_ref[d].first();
+
+            this->distmem.grid_size_ref[d] = refine_grid_size(rng_t(0,grid_size[d]-1), n_ref, 0, 1).length();
+          }
+        }
+
+        // NOTE: not all advectees are refined, so e (numbering) in refinee is different than in advectee
+        virtual arr_t refinee(int e = 0) = 0;
+      //  virtual const arr_t refinee_global_ref(int e = 0) = 0;
+
+        public:
+        static rng_t refine_grid_size(
+          const rng_t &grid_size,
+          const int &n_ref,
+          const int &mpi_rank,
+          const int &mpi_size
+        ) {
+          assert(n_ref % 2 == 0);
+          // NOTE: overlapping points inbetween MPI domains
+          return rng_t(
+            mpi_rank == 0          ? grid_size.first() * n_ref : grid_size.first() * n_ref - n_ref / 2,
+            mpi_rank == mpi_size-1 ? grid_size.last()  * n_ref : grid_size.last()  * n_ref + n_ref / 2 // refined points between MPI domains are evenly divided between MPI processes
+          );
+        }
+      };
+
+
+
+      template<typename real_t, int n_dims, int n_tlev>
+      class sharedmem_refined
+      {};
+
+      template<typename real_t, int n_tlev>
+      class sharedmem_refined<real_t, 3, n_tlev> : public sharedmem_refined_common<real_t, 3, n_tlev>
+      {
+        using parent_t = sharedmem_refined_common<real_t, 3, n_tlev>;
+        using parent_t::parent_t; // inheriting ctors
+
+        protected:
+        using arr_t = typename parent_t::arr_t;
+
+        public:
+        arr_t refinee(int e = 0) override
+        {
+          return this->psi_ref[e](
+            this->grid_size_ref[0],
+            this->grid_size_ref[1],
+            this->grid_size_ref[2]
+          ).reindex(this->origin_ref);
+        }
+      };
+
+    } // namespace detail
+  } // namespace concurr
+} // namespace libmpdataxx
diff --git a/libmpdata++/concurr/openmp.hpp b/libmpdata++/concurr/openmp.hpp
index d33cda98..96362754 100644
--- a/libmpdata++/concurr/openmp.hpp
+++ b/libmpdata++/concurr/openmp.hpp
@@ -31,7 +31,6 @@ namespace libmpdataxx
     {
       using parent_t = detail::concurr_common<solver_t, bcxl, bcxr, bcyl, bcyr, bczl, bczr>;
 
-
       struct mem_t : parent_t::mem_t
       {
         static int size(const unsigned max_threads = std::numeric_limits<unsigned>::max())
@@ -58,7 +57,8 @@ namespace libmpdataxx
         }
 
         // ctors
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size) : parent_t::mem_t(grid_size, size(grid_size[0])) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : parent_t::mem_t(grid_size, size(grid_size[0]))        {};
       };
 
       void solve(typename parent_t::advance_arg_t nt)
@@ -75,9 +75,8 @@ namespace libmpdataxx
 
       public:
 
-      // ctor
       openmp(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size(p.grid_size[solver_t::n_dims < 3 ? 0 : 1])) // note 3D domain decomposition in y direction
       {}
 
     };
diff --git a/libmpdata++/concurr/serial.hpp b/libmpdata++/concurr/serial.hpp
index f6abc1ea..1a5fe18b 100644
--- a/libmpdata++/concurr/serial.hpp
+++ b/libmpdata++/concurr/serial.hpp
@@ -33,9 +33,8 @@ namespace libmpdataxx
         void barrier() { }
 
         // ctors
-        mem_t(const std::array<int, solver_t::n_dims> &grid_size)
-          : parent_t::mem_t(grid_size, size())
-        {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size, const int n_ref) : parent_t::mem_t(grid_size, size(grid_size[0]), n_ref) {};
+        mem_t(const std::array<int, solver_t::n_dims> &grid_size)                  : parent_t::mem_t(grid_size, size(grid_size[0]))        {};
       };
 
       void solve(typename parent_t::advance_arg_t nt)
@@ -47,7 +46,7 @@ namespace libmpdataxx
 
       // ctor
       serial(const typename solver_t::rt_params_t &p) :
-        parent_t(p, new mem_t(p.grid_size), mem_t::size())
+        parent_t(p, detail::mem_factory<mem_t, solver_t>(p), mem_t::size())
       {}
 
     };
diff --git a/libmpdata++/formulae/fractal_reconstruction.hpp b/libmpdata++/formulae/fractal_reconstruction.hpp
new file mode 100644
index 00000000..15fbacc6
--- /dev/null
+++ b/libmpdata++/formulae/fractal_reconstruction.hpp
@@ -0,0 +1,181 @@
+/** @file
+* @copyright University of Warsaw
+* @section LICENSE
+* GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+*/
+
+#pragma once
+
+#include <libmpdata++/formulae/idxperm.hpp>
+
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace fractal
+    {
+      // CDF of stretching parameter d; assuming CDF = A d^B + C under condition that CDF(0.5)=0 and CDF(1)=1
+      template<class real_t>
+      static real_t CDF_of_d(const real_t &d)
+      {
+        const real_t B = -0.39091161; // comes from a fit to the E. Akinlabi data
+        return (pow(d,B) - pow(0.5,B)) / (1. - pow(0.5,B));
+      }
+      // inverse function: d of a CDF
+      template<class real_t>
+      static real_t d_of_CDF(const real_t &CDF)
+      {
+        const real_t B = -0.39091161; // comes from a fit to the E. Akinlabi data
+        return pow((1.-pow(0.5,B))*CDF + pow(0.5,B), 1./B);
+      }
+
+      template<class real_t>
+      struct d_of_CDF_fctr
+      {
+        real_t operator()(const real_t &CDF) const
+        {
+          return CDF < 0 ? -d_of_CDF(-CDF) : d_of_CDF(CDF);
+        }
+        BZ_DECLARE_FUNCTOR(d_of_CDF_fctr);
+      };
+
+      // (tri-)linear interpolation
+      // 3d version
+      template<int d, class real_t, class arr_t, class rng_t>
+      void intrp(
+        arr_t arr,       // array to be interpolated
+        const rng_t &i,  // index of the point to be interpolated to
+        const rng_t &j,  // ditto
+        const rng_t &k,  // ditto
+        const int &dist  // number of indices between an interpolated point and a known point
+      )
+      {
+        using idxperm::pis;
+
+        // debug output
+         // std::cerr << "ranges in interpolation" << std::endl;
+         // if(d==0)
+         //   std::cerr << "range<" << d << ">: " << i << " " << j << " " << k << std::endl;
+         // if(d==1)
+         //   std::cerr << "range<" << d << ">: " << k << " " << i << " " << j << std::endl;
+         // if(d==2)
+         //   std::cerr << "range<" << d << ">: " << j << " " << k << " " << i << std::endl;
+
+        arr(pis<d>(i, j, k)) = real_t(.5) * (
+          arr(pis<d>(i - dist, j, k)) +
+          arr(pis<d>(i + dist, j, k))
+        );
+      }
+
+      // fractral reconstruction (calculates 2 mid-points based on 3)
+      // following Akinlabi et al. "Fractal reconstruciton..." 2019
+      // 3d version
+      template<int d, class real_t, class arr_t, class rng_t>
+      void rcnstrct(
+        arr_t arr,       // array to be reconstructed
+        const rng_t &i,  // index of the first point from the pair to be reconstructed
+        const rng_t &j,  // ditto
+        const rng_t &k,  // ditto
+        const int &dist, // number of indices between a reconstructed point and a known point
+        arr_t c_j,       // parameter c
+        const arr_t d_j, // parameter d (stretching) - needs to be calculated before
+        arr_t f_j        // parameter f
+      )
+      {
+          // debug output
+//          std::cerr << "ranges in rcnstrct" << std::endl;
+//          if(d==0)
+//            std::cerr << "range<" << d << ">: " << i << " " << j << " " << k << std::endl;
+//          if(d==1)
+//            std::cerr << "range<" << d << ">: " << k << " " << i << " " << j << std::endl;
+//          if(d==2)
+//            std::cerr << "range<" << d << ">: " << j << " " << k << " " << i << std::endl;
+
+          using idxperm::pis;
+
+          // second reconstructed position (j=2, between i=1 and i=2)
+          const rng_t i_next = i + 2*dist;
+
+          const int x[3] = {0, 1, 2};
+
+          // helper references
+          const arr_t u_0 = arr(pis<d>(i      - dist, j, k)),
+                      u_1 = arr(pis<d>(i      + dist, j, k)),
+                      u_2 = arr(pis<d>(i_next + dist, j, k));
+
+          arr_t c_1 = c_j(pis<d>(i,      j, k)),
+                c_2 = c_j(pis<d>(i_next, j, k)),
+                d_1 = d_j(pis<d>(i,      j, k)),
+                d_2 = d_j(pis<d>(i_next, j, k)),
+                f_1 = f_j(pis<d>(i,      j, k)),
+                f_2 = f_j(pis<d>(i_next, j, k));
+
+          // Eqs. (5) and (6) Akinlabi et al.
+          c_1 = (u_1 - u_0) / (2.) - d_1 * (u_2 - u_0) / (2.);
+          c_2 = (u_2 - u_1) / (2.) - d_2 * (u_2 - u_0) / (2.);
+          f_1 = (x[2] * u_0 - x[0] * u_1) / (2.) - d_1 * (x[2] * u_0 - x[0] * u_2) / (2.);
+          f_2 = (x[2] * u_1 - x[0] * u_2) / (2.) - d_2 * (x[2] * u_0 - x[0] * u_2) / (2.);
+
+          // new u, at j=1, between i=0 and i=1, result of w_j=1(u_i=1), Eq. (1) in Akinlabi et al.
+          arr(pis<d>(i    , j, k)) = c_1 * 1 + d_1 * u_1 + f_1; 
+          // new u, at j=2, between i=1 and i=2, result of w_j=2(u_i=1), Eq. (1) in Akinlabi et al.
+          arr(pis<d>(i_next, j, k)) = c_2 * 1 + d_2 * u_1 + f_2;
+
+          // DEBUG: do interpolation, useful for testing if ranges are correct
+//          arr(pis<d>(i, j, k)) = real_t(.5) * (u_0 + u_1);
+//          arr(pis<d>(i_next, j, k)) = real_t(.5) * (u_1 + u_2);
+
+
+// alternative solution following Scotti and Meneveau Physica D 1999
+/*
+
+          // helper references
+          // NOTE: these are actually the resolved values only in the first iteration, in the subsequent iterations some of these are reconstructed values
+          //       should we point to the resolved values in all iterations?? 
+          const arr_t u_im1 = arr(pis<d>(i      - dist, j, k)), // resolved u_(i-1)
+                      u_i   = arr(pis<d>(i      + dist, j, k)), // resolved u_(i)
+                      u_ip1 = arr(pis<d>(i_next + dist, j, k)); // resolved u_(i+1)
+
+          // c_j and f_j naming comes from the Akinlabi paper, TODO: rename to a_i b_i
+          // NOTE: in multiple iterations of reconstruction, they only change due to random stretching parameters? 
+          //       or the stretching parameter should be drawn from the distro once? then no need for c_j and f_j to have the same size as reconstructed fields...
+          arr_t a_1 = this->c_j(pis<d>(i,      j, k)),
+                a_2 = this->c_j(pis<d>(i_next, j, k)),
+                b_1 = this->f_j(pis<d>(i,      j, k)),
+                b_2 = this->f_j(pis<d>(i_next, j, k));
+
+          // Eqs. (13) and (14) from Scotti Meneveau 1999 
+          a_1 = u_i - u_im1 - d_1*(u_ip1 - u_im1);
+          a_2 = u_ip1 - u_i - d_2*(u_ip1 - u_im1);
+          b_1 = u_im1*(real_t(1) - d_1); 
+          b_2 = u_i - d_2*u_im1; 
+
+          // W(u_0(xi)) at xi=1/4, between u_i-1 and u_i
+          // u_0(2*xi) = u_i-1 + 1/2 * (u_i+1 - u_i-1)
+          // Eq. (6) and caption of fig. 1 Scotti 1999
+          // NOTE: in multiple iterations, should xi be between the 3 resolved points? or should we consider the reconstructed points as new resolved points?
+          arr(pis<d>(i,      j, k)) = d_1 * 
+//                                      (u_im1 + 0.5 * (u_ip1 - u_im1)) + // u_0(2*xi)
+                                      u_i + 
+                                      0.5 * a_1 + b_1;                  // q_1(2*xi)
+          // W(u_0(xi)) at xi=3/4, between u_i and u_i+1
+          // u_0(2*xi-1) = u_i-1 + 1/2 * (u_i+1 - u_i-1)
+          // NOTE: u_0(2*3/4-1) == u_0(2*1/4), so calculate it once in the previous step and store it...
+          arr(pis<d>(i_next, j, k)) = d_2 * 
+//                                      (u_im1 + 0.5 * (u_ip1 - u_im1)) + // u_0(2*xi-1)
+                                      u_i + 
+                                      0.5 * a_2 + b_2;                  // q_2(2*xi-1)
+*/
+          // Eq. (5) Scotti and Meneveau PRL 1997, a_1 and a_2 differ from Scotti Meneveau 1999 !
+          // NOTE: in the 1999 paper, a_1 and a_2 are multipled by 2*xi and 2*xi-1, respectively
+          //       in the 1997 paper, the are both multipled by xi
+          /*
+          a_1 = 2 * (u_im1 - u_i - d_1*(u_ip1 - u_im1));
+          a_2 = 2 * (u_ip1 - u_i - d_2*(u_ip1 - u_im1));
+          b_1 = u_im1*(real_t(1) - d_1); 
+          b_2 = u_i - d_2*u_im1; 
+          */
+      }
+    };
+  };
+};
diff --git a/libmpdata++/formulae/idxperm.hpp b/libmpdata++/formulae/idxperm.hpp
index 5c5c1c74..1d58574d 100644
--- a/libmpdata++/formulae/idxperm.hpp
+++ b/libmpdata++/formulae/idxperm.hpp
@@ -69,5 +69,18 @@ namespace libmpdataxx
 
     template<>
     inline int_idx_t<3> pi<2>(const int k, const int i, const int j) { return int_idx_t<3>({i,j,k}); }
+
+    // 3D - strided ranges
+    template<int d>
+    inline idxs_t<3> pis(const rng_t &i, const rng_t &j, const rng_t &k);
+
+    template<>
+    inline idxs_t<3> pis<0>(const rng_t &i, const rng_t &j, const rng_t &k) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; }
+
+    template<>
+    inline idxs_t<3> pis<1>(const rng_t &j, const rng_t &k, const rng_t &i) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; }
+
+    template<>
+    inline idxs_t<3> pis<2>(const rng_t &k, const rng_t &i, const rng_t &j) { return idxs_t<3>{{i.first(), j.first(), k.first()}, {i.last(), j.last(), k.last()}, {i.stride(), j.stride(), k.stride()}}; }
   } // namespace idxperm
 } // namespace libmpdataxx
diff --git a/libmpdata++/formulae/refined_grid.hpp b/libmpdata++/formulae/refined_grid.hpp
new file mode 100644
index 00000000..4093b1d5
--- /dev/null
+++ b/libmpdata++/formulae/refined_grid.hpp
@@ -0,0 +1,61 @@
+/** @file
+* @copyright University of Warsaw
+* @section LICENSE
+* GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+*/
+
+#pragma once
+
+//#include <libmpdata++/formulae/idxperm.hpp>
+
+namespace libmpdataxx
+{
+  namespace formulae
+  {
+    namespace refined
+    {
+      // calculate regular grid scalar as an average from the refined grid
+      // modifies the refined grid scalar!
+      // 3d version
+      template<class real_t, class arr_t, class ijk_t>
+      void spatial_average_ref2reg(
+        arr_t arr,
+        const ijk_t &ijk,                    // index of the refined point that overlaps with the regular point
+        const int &dist,                     // average calculate from the box (i-dist, i+dist)(j-dist,j+dist)(k-dist.k+dist)
+        const std::array<int, 3> grid_size,  // number of points in each direction (total, not for this MPI process), needed for averaging on edges so that it does not go out of the domain; refined arrays have no halos on domain edgs, only halos between MPI subdomains
+        const bool volume_avg                // should averaging account weights proportional to cell volume (refined cells at box edges are only partially within the regular cell)
+      )
+      {
+        // WARNING: very bad LOOPS
+        for(int i = ijk[0].first(); i <= ijk[0].last(); i+= ijk[0].stride())
+          for(int j = ijk[1].first(); j <= ijk[1].last(); j+= ijk[1].stride())
+            for(int k = ijk[2].first(); k <= ijk[2].last(); k+= ijk[2].stride())
+            {
+              real_t norm = 0;
+              for(int id=-dist; id<=dist; ++id)
+                for(int jd=-dist; jd<=dist; ++jd)
+                  for(int kd=-dist; kd<=dist; ++kd)
+                  {
+                    if(id==0 && jd==0 && kd==0) continue; // dont add the overlapping point
+                    // dont add points outside of the domain
+                    if(i+id < 0 || i+id > grid_size[0]-1 ||
+                       j+jd < 0 || j+jd > grid_size[1]-1 ||
+                       k+kd < 0 || k+kd > grid_size[2]-1) 
+                       continue;
+
+                    real_t w = 1; // weight
+                    if(volume_avg)
+                    {
+                      if(id==-dist || id == dist) w/=2.;
+                      if(jd==-dist || jd == dist) w/=2.;
+                      if(kd==-dist || kd == dist) w/=2.;
+                    }
+                    arr(i,j,k) += w * arr(i+id,j+jd,k+kd);
+                    norm += w;
+                  }
+              arr(i,j,k) /= norm+1; // +1 because the center cell was not added to norm
+            }
+      }
+    };
+  };
+};
diff --git a/libmpdata++/opts.hpp b/libmpdata++/opts.hpp
index 35a89ea2..33f24dc5 100644
--- a/libmpdata++/opts.hpp
+++ b/libmpdata++/opts.hpp
@@ -31,6 +31,17 @@ namespace libmpdataxx
       return x == 0 ? 0. : int(log(x)/log(2)) + 1;
     }
 
+    // https://www.geeksforgeeks.org/count-set-bits-in-an-integer/
+    constexpr unsigned int nset(opts_t x)
+    {
+      unsigned int count = 0;
+      while (x) {
+        count += x & 1;
+        x >>= 1;
+      }
+      return count;
+    }
+
     enum
     {
       fct = opts::bit(0), // flux-corrected transport
@@ -83,6 +94,7 @@ namespace libmpdataxx
     enum { opts = opts::iga | opts::fct };
     enum { hint_norhs = 0 };
     enum { delayed_step = 0 };
+    enum { fractal_recon = 0 };
     struct ix {};
     static constexpr int hint_scale(const int &e) { return 0; } // base-2 logarithm
     enum { var_dt = false};
diff --git a/libmpdata++/output/detail/output_common.hpp b/libmpdata++/output/detail/output_common.hpp
index 47920259..430306d5 100644
--- a/libmpdata++/output/detail/output_common.hpp
+++ b/libmpdata++/output/detail/output_common.hpp
@@ -38,6 +38,7 @@ namespace libmpdataxx
 
         virtual void record(const int var) {}
         virtual void start(const typename parent_t::advance_arg_t nt) {}
+        virtual void hook_ante_record_all() {}
 
         typename parent_t::arr_t out_data(const int var)
         {
@@ -55,6 +56,7 @@ namespace libmpdataxx
             this->mem->barrier();
           }
 
+          hook_ante_record_all();
           if (this->rank == 0)
           {
             record_time = this->time;
@@ -145,20 +147,23 @@ namespace libmpdataxx
               this->mem->barrier();
           }
 
-          if (this->rank == 0)
+          if (this->var_dt && do_record_cnt == 1)
           {
-            //TODO: output of solver statistics every timesteps could probably go here
-
-            if (this->var_dt && do_record_cnt == 1)
-            {
+            hook_ante_record_all();
+            if (this->rank == 0)
               record_all();
-            }
-            else if (!this->var_dt)
+          }
+          else if (!this->var_dt)
+          {
+            record_time = this->time;
+            for (int t = 0; t < outwindow; ++t)
             {
-              record_time = this->time;
-              for (int t = 0; t < outwindow; ++t)
+              if ((this->timestep - t) % static_cast<int>(outfreq) == 0)
               {
-                if ((this->timestep - t) % static_cast<int>(outfreq) == 0) record_all();
+                this->mem->barrier();
+                hook_ante_record_all();
+                if (this->rank == 0)
+                  record_all();
               }
             }
           }
diff --git a/libmpdata++/output/detail/xdmf_writer.hpp b/libmpdata++/output/detail/xdmf_writer.hpp
index 173ebd4a..cd7a1c7d 100644
--- a/libmpdata++/output/detail/xdmf_writer.hpp
+++ b/libmpdata++/output/detail/xdmf_writer.hpp
@@ -18,86 +18,99 @@ namespace libmpdataxx
     namespace detail
     {
       template<int dim>
-      class xdmf_writer
+      struct data_item
       {
         using ptree = boost::property_tree::ptree;
-#if (BOOST_VERSION >= 105600)
-        using xml_writer_settings = boost::property_tree::xml_writer_settings<std::string>;
-#else
-        using xml_writer_settings = boost::property_tree::xml_writer_settings<char>;
-#endif
 
-        struct data_item
+        blitz::TinyVector<int, dim> dimensions;
+        const std::string number_type = "Float";
+        const std::string format = "HDF";
+        std::string data;
+        void add(ptree& node)
         {
-          blitz::TinyVector<int, dim> dimensions;
-          const std::string number_type = "Float";
-          const std::string format = "HDF";
-          std::string data;
-          void add(ptree& node)
-          {
-            std::stringstream ss;
-            for (auto d : dimensions)
-              ss << d << ' ';
-            ptree& dat_node = node.add("DataItem", data);
-            dat_node.put("<xmlattr>.Dimensions", ss.str());
-            dat_node.put("<xmlattr>.NumberType", number_type);
-            dat_node.put("<xmlattr>.Format", format);
-          }
-        };
+          std::stringstream ss;
+          for (auto d : dimensions)
+            ss << d << ' ';
+          ptree& dat_node = node.add("DataItem", data);
+          dat_node.put("<xmlattr>.Dimensions", ss.str());
+          dat_node.put("<xmlattr>.NumberType", number_type);
+          dat_node.put("<xmlattr>.Format", format);
+        }
+      };
 
-        struct topology
+      template<int dim>
+      struct topology
+      {
+        using ptree = boost::property_tree::ptree;
+
+        const std::string topology_type = dim == 2 ? "2DSMesh" : "3DSMesh";
+        blitz::TinyVector<int, dim> dimensions;
+        void add(ptree& node)
         {
-          const std::string topology_type = dim == 2 ? "2DSMesh" : "3DSMesh";
-          blitz::TinyVector<int, dim> dimensions;
-          void add(ptree& node)
-          {
-            node.put("Topology.<xmlattr>.TopologyType", topology_type);
-            std::stringstream ss;
-            for (auto d : dimensions)
-              ss << d << ' ';
-            node.put("Topology.<xmlattr>.Dimensions", ss.str());
-          }
-        };
+          node.put("Topology.<xmlattr>.TopologyType", topology_type);
+          std::stringstream ss;
+          for (auto d : dimensions)
+            ss << d << ' ';
+          node.put("Topology.<xmlattr>.Dimensions", ss.str());
+        }
+      };
+
+      template<int dim>
+      struct geometry
+      {
+        using ptree = boost::property_tree::ptree;
 
-        struct geometry
+        const std::string geometry_type = dim == 2 ? "X_Y" : "X_Y_Z";
+        std::array<data_item<dim>, dim> coords;
+        void add(ptree& node)
         {
-          const std::string geometry_type = dim == 2 ? "X_Y" : "X_Y_Z";
-          std::array<data_item, dim> coords;
-          void add(ptree& node)
-          {
-            ptree& geo_node = node.add("Geometry", "");
-            geo_node.put("<xmlattr>.GeometryType", geometry_type);
-            for (auto &c : coords)
-              c.add(geo_node);
-          }
-        };
+          ptree& geo_node = node.add("Geometry", "");
+          geo_node.put("<xmlattr>.GeometryType", geometry_type);
+          for (auto &c : coords)
+            c.add(geo_node);
+        }
+      };
+
+      template<int dim>
+      struct attribute_t
+      {
+        using ptree = boost::property_tree::ptree;
 
-        struct attribute
+        std::string name;
+        const std::string attribute_type = "Scalar";
+        const std::string center = "Cell";
+        mutable data_item<dim> item;
+        void add(ptree& node)
         {
-          std::string name;
-          const std::string attribute_type = "Scalar";
-          const std::string center = "Cell";
-          mutable data_item item;
-          void add(ptree& node)
-          {
-            ptree& attr_node = node.add("Attribute", "");
-            attr_node.put("<xmlattr>.Name", name);
-            attr_node.put("<xmlattr>.AttributeType", attribute_type);
-            attr_node.put("<xmlattr>.Center", center);
-            item.add(attr_node);
-          }
+          ptree& attr_node = node.add("Attribute", "");
+          attr_node.put("<xmlattr>.Name", name);
+          attr_node.put("<xmlattr>.AttributeType", attribute_type);
+          attr_node.put("<xmlattr>.Center", center);
+          item.add(attr_node);
+        }
 
-          // to allow storing attributes in std::set
-          friend bool operator<(const attribute &lhs, const attribute &rhs)
-          {
-            return lhs.name < rhs.name;
-          }
-        };
+        // to allow storing attributes in std::set
+        friend bool operator<(const attribute_t &lhs, const attribute_t &rhs)
+        {
+          return lhs.name < rhs.name;
+        }
+      };
+
+      template<int dim>
+      class xdmf_writer
+      {
+        using ptree = boost::property_tree::ptree;
+#if (BOOST_VERSION >= 105600)
+        using xml_writer_settings = boost::property_tree::xml_writer_settings<std::string>;
+#else
+        using xml_writer_settings = boost::property_tree::xml_writer_settings<char>;
+#endif
+        using attribute = attribute_t<dim>;
 
         const std::string name = "Grid";
         const std::string grid_type = "Uniform";
-        topology top;
-        geometry geo;
+        topology<dim> top;
+        geometry<dim> geo;
         std::set<attribute> attrs;
         std::set<attribute> c_attrs;
 
@@ -179,7 +192,7 @@ namespace libmpdataxx
           write_xml(xmf_name, pt, std::locale(), settings);
         }
 
-        void write_temporal(const std::string& xmf_name, const std::vector<std::string>& timesteps)
+        void write_temporal(const std::string& xmf_name, const std::vector<std::string>& timesteps, const std::string &timesteps_suffix = "")
         {
 
           ptree pt;
@@ -193,14 +206,13 @@ namespace libmpdataxx
           for (auto ts : timesteps)
           {
             ptree& ts_node = grid_node.add("xi:include", "");
-            ts_node.put("<xmlattr>.href", ts);
+            ts_node.put("<xmlattr>.href", ts + timesteps_suffix);
             ts_node.put("<xmlattr>.xpointer", "gid");
           }
 
           xml_writer_settings settings('\t', 1);
           write_xml(xmf_name, pt, std::locale(), settings);
         }
-
       };
 
     }
diff --git a/libmpdata++/output/hdf5.hpp b/libmpdata++/output/hdf5.hpp
index 6d8b906b..859f2d77 100644
--- a/libmpdata++/output/hdf5.hpp
+++ b/libmpdata++/output/hdf5.hpp
@@ -40,7 +40,7 @@ namespace libmpdataxx
       using output_t = hdf5<solver_t>;
 
       std::unique_ptr<H5::H5File> hdfp;
-      std::map<int, std::string> dim_names;
+      std::map<int, std::string> dim_names, dim_names_ref;
       const std::string const_name = "const.h5";
       std::string const_file;
       const hsize_t zero = 0, one = 1;
@@ -55,10 +55,10 @@ namespace libmpdataxx
               H5::PredType::NATIVE_FLOAT,
         flttype_output = H5::PredType::NATIVE_FLOAT; // using floats not to waste disk space
 
-      blitz::TinyVector<hsize_t, parent_t::n_dims> cshape, shape, chunk, srfcshape, srfcchunk, offst;
+      blitz::TinyVector<hsize_t, parent_t::n_dims> cshape, shape, chunk, srfcshape, srfcchunk, offst, shape_ref, cshape_ref, chunk_ref, offst_ref; // what if grid refinement is not done???
       H5::DSetCreatPropList params;
 
-      H5::DataSpace sspace, cspace, srfcspace;
+      H5::DataSpace sspace, cspace, srfcspace, sspace_ref, cspace_ref;
 #if defined(USE_MPI)
       hid_t fapl_id;
 #endif
@@ -96,35 +96,52 @@ namespace libmpdataxx
           // creating the dimensions
           // x,y,z
           offst = 0;
+          offst_ref = 0;
 
           for (int d = 0; d < parent_t::n_dims; ++d)
+          {
             shape[d] = this->mem->distmem.grid_size[d];
+            shape_ref[d] = this->mem->distmem.grid_size_ref[d];
+          }
 
           chunk = shape;
+          chunk_ref = shape_ref;
 
           // there is one more coordinate than cell index in each dimension
           cshape = shape + 1;
+          cshape_ref = shape_ref + 1;
 
           srfcshape = shape;
           *(srfcshape.end()-1) = 1;
           sspace = H5::DataSpace(parent_t::n_dims, shape.data());
-          srfcspace = H5::DataSpace(parent_t::n_dims, srfcshape.data());
           cspace = H5::DataSpace(parent_t::n_dims, cshape.data());
+          srfcspace = H5::DataSpace(parent_t::n_dims, srfcshape.data());
+          sspace_ref = H5::DataSpace(parent_t::n_dims, shape_ref.data());
+          cspace_ref = H5::DataSpace(parent_t::n_dims, cshape_ref.data());
 
 #if defined(USE_MPI)
           if (this->mem->distmem.size() > 1)
           {
             shape[0] = this->mem->grid_size[0].length();
             cshape[0] = this->mem->grid_size[0].length();
+            shape_ref[0] = this->mem->distmem.rank() < this->mem->distmem.size() - 1 ? 
+              this->mem->grid_size_ref[0].length()-1 : // -1 to make ranges nonoverlapping
+              this->mem->grid_size_ref[0].length();
+            cshape_ref[0] = shape_ref[0];
 
             if (this->mem->distmem.rank() == this->mem->distmem.size() - 1)
+            {
               cshape[0] += 1;
+              cshape_ref[0] += 1;
+            }
 
             offst[0] = this->mem->grid_size[0].first();
+            offst_ref[0] = this->mem->grid_size_ref[0].first();
 
             // chunk size has to be common to all processes !
             // TODO: something better ?
             chunk[0] = ( (typename solver_t::real_t) (this->mem->distmem.grid_size[0])) / this->mem->distmem.size() + 0.5 ;
+            chunk_ref[0] = ( (typename solver_t::real_t) (this->mem->distmem.grid_size_ref[0])) / this->mem->distmem.size() + 0.5 ;
           }
 #endif
 
@@ -174,6 +191,39 @@ namespace libmpdataxx
               curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, cshape.data()), dim_space, dxpl_id);
             }
 
+            // refined X, Y, Z, TODO: very similar to X, Y, Z
+            for (int i = 0; i < parent_t::n_dims; ++i)
+            {
+
+              blitz::Array<typename solver_t::real_t, parent_t::n_dims> coord(cshape_ref);
+#if defined(USE_MPI)
+              coord.reindexSelf(offst_ref);
+#endif
+              std::string name;
+              switch (i)
+              {
+                case 0 : coord = this->di / 2. + this->di / this->mem->n_ref * (blitz::firstIndex() - .5);
+                         name = "X refined";
+                         dim_names_ref[i] = name;
+                         break;
+                case 1 : coord = this->dj / 2. + this->dj / this->mem->n_ref * (blitz::secondIndex() - .5);
+                         name = "Y refined";
+                         dim_names_ref[i] = name;
+                         break;
+                case 2 : coord = this->dk / 2. + this->dk / this->mem->n_ref * (blitz::thirdIndex() - .5);
+                         name = "Z refined";
+                         dim_names_ref[i] = name;
+                         break;
+                default : break;
+              }
+
+              auto curr_dim = (*hdfp).createDataSet(name, flttype_output, cspace_ref);
+
+              H5::DataSpace dim_space = curr_dim.getSpace();
+              dim_space.selectHyperslab(H5S_SELECT_SET, cshape_ref.data(), offst_ref.data());
+              curr_dim.write(coord.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, cshape_ref.data()), dim_space, dxpl_id);
+            }
+
             // T
             {
               const hsize_t
@@ -287,33 +337,35 @@ namespace libmpdataxx
         };
       }
 
-      void record_dsc_helper(const H5::DataSet &dset, const typename solver_t::arr_t &arr)
+      void record_dsc_helper(const H5::DataSet &dset, const typename solver_t::arr_t &arr, const bool refined = false)
       {
         H5::DataSpace space = dset.getSpace();
-        space.selectHyperslab(H5S_SELECT_SET, shape.data(), offst.data());
-        // TODO: some permutation of grid_size instead of the switch
+        const auto _grid_size(refined ? this->mem->grid_size_ref : this->mem->grid_size);
+        const auto _shape(refined ? shape_ref : shape);
 
+        space.selectHyperslab(H5S_SELECT_SET, _shape.data(), refined ? offst_ref.data() : offst.data());
+
+        // TODO: some permutation of grid_size instead of the switch
         switch (int(solver_t::n_dims))
         {
           case 1:
           {
-            typename solver_t::arr_t contiguous_arr = arr(this->mem->grid_size[0]).copy(); // create a copy that is contiguous
-            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id);
+            typename solver_t::arr_t contiguous_arr = arr(_grid_size[0]).copy(); // create a copy that is contiguous
+            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id);
             break;
           }
           case 2:
           {
-            typename solver_t::arr_t contiguous_arr = arr(this->mem->grid_size[0], this->mem->grid_size[1]).copy(); // create a copy that is contiguous
-            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id);
+            typename solver_t::arr_t contiguous_arr = arr(_grid_size[0], _grid_size[1]).copy(); // create a copy that is contiguous
+            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id);
             break;
           }
           case 3:
           {
             // create a copy that is contiguous and has the C-style (kji) storage order as required by HDF5
-            typename solver_t::arr_t contiguous_arr(this->mem->grid_size[0], this->mem->grid_size[1], this->mem->grid_size[2]); 
-            contiguous_arr = arr(this->mem->grid_size[0], this->mem->grid_size[1], this->mem->grid_size[2]);
-
-            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id);
+            typename solver_t::arr_t contiguous_arr(_grid_size[0], _grid_size[1], _grid_size[2]); 
+            contiguous_arr = arr(_grid_size[0], _grid_size[1], _grid_size[2]);
+            dset.write(contiguous_arr.data(), flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id);
             break;
           }
           default: assert(false);
@@ -321,45 +373,54 @@ namespace libmpdataxx
       }
 
       // data is assumed to be contiguous and in the same layout as hdf variable and in the C-style storage order
-      void record_aux_hlpr(const std::string &name, typename solver_t::real_t *data, H5::H5File hdf)
+      void record_aux_hlpr(const std::string &name, typename solver_t::real_t *data, H5::H5File hdf, bool refined = false)
       {
         assert(this->rank == 0);
+        const auto _shape(refined ? shape_ref : shape);
+        const auto _offst(refined ? offst_ref : offst);
+        if(refined) params.setChunk(parent_t::n_dims, chunk_ref.data());
 
         auto aux = hdf.createDataSet(
           name,
           flttype_output,
-          sspace,
+          refined ? sspace_ref : sspace,
           params
         );
 
         auto space = aux.getSpace();
-        space.selectHyperslab(H5S_SELECT_SET, shape.data(), offst.data());
-        aux.write(data, flttype_solver, H5::DataSpace(parent_t::n_dims, shape.data()), space, dxpl_id);
+        space.selectHyperslab(H5S_SELECT_SET, _shape.data(), _offst.data());
+        aux.write(data, flttype_solver, H5::DataSpace(parent_t::n_dims, _shape.data()), space, dxpl_id);
+
+        // revert to default chunk
+        if(refined) params.setChunk(parent_t::n_dims, chunk.data());
       }
 
       // for discontiguous array with halos
-      void record_aux_dsc_hlpr(const std::string &name, const typename solver_t::arr_t &arr, H5::H5File hdf, bool srfc = false)
+      void record_aux_dsc_hlpr(const std::string &name, const typename solver_t::arr_t &arr, H5::H5File hdf, bool srfc = false, bool refined = false)
       {
         assert(this->rank == 0);
+        assert(!(refined && srfc));
 
         if(srfc)
           params.setChunk(parent_t::n_dims, srfcchunk.data());
+        else if (refined)
+          params.setChunk(parent_t::n_dims, chunk_ref.data());
 
         auto aux = hdf.createDataSet(
           name,
           flttype_output,
-          srfc ? srfcspace : sspace,
+          srfc ? srfcspace : refined ? sspace_ref : sspace,
           params
         );
 
         if(srfc)
-        {
-          // revert to default chunk
-          params.setChunk(parent_t::n_dims, chunk.data());
           record_dsc_srfc_helper(aux, arr);
-        }
         else
-          record_dsc_helper(aux, arr);
+          record_dsc_helper(aux, arr, refined);
+
+        // revert to default chunk
+        if(srfc || refined)
+          params.setChunk(parent_t::n_dims, chunk.data());
       }
 
       void record_scalar_hlpr(const std::string &name, const std::string &group_name, typename solver_t::real_t data, H5::H5File hdf)
@@ -405,7 +466,12 @@ namespace libmpdataxx
       // data is assumed to be contiguous and in the same layout as hdf variable and in the C-style storage order
       void record_aux(const std::string &name, typename solver_t::real_t *data)
       {
-        record_aux_hlpr(name, data, *hdfp);
+        record_aux_hlpr(name, data, *hdfp, false);
+      }
+
+      void record_aux_refined(const std::string &name, typename solver_t::real_t *data)
+      {
+        record_aux_hlpr(name, data, *hdfp, true);
       }
 
       void record_aux_dsc(const std::string &name, const typename solver_t::arr_t &arr, bool srfc = false)
@@ -413,6 +479,11 @@ namespace libmpdataxx
         record_aux_dsc_hlpr(name, arr, *hdfp, srfc);
       }
 
+      void record_aux_dsc_refined(const std::string &name, const typename solver_t::arr_t &arr)
+      {
+        record_aux_dsc_hlpr(name, arr, *hdfp, false, true);
+      }
+
       void record_aux_scalar(const std::string &name, const std::string &group_name, typename solver_t::real_t data)
       {
         record_scalar_hlpr(name, group_name, data, *hdfp);
@@ -471,9 +542,11 @@ namespace libmpdataxx
       }
 
       // see above, also assumes that z is the last dimension
-      void record_prof_const(const std::string &name, typename solver_t::real_t *data)
+      void record_prof_const(const std::string &name, typename solver_t::real_t *data, const bool refined = false)
       {
         assert(this->rank == 0);
+        const auto _shape(refined ? shape_ref : shape);
+        const auto _offst(refined ? offst_ref : offst);
 
         H5::H5File hdfcp(const_file, H5F_ACC_RDWR
 #if defined(USE_MPI)
@@ -484,7 +557,7 @@ namespace libmpdataxx
         auto aux = hdfcp.createDataSet(
           name,
           flttype_output,
-          H5::DataSpace(1, &shape[parent_t::n_dims - 1])
+          H5::DataSpace(1, &_shape[parent_t::n_dims - 1])
         );
 
 #if defined(USE_MPI)
@@ -492,8 +565,8 @@ namespace libmpdataxx
 #endif
         {
           auto space = aux.getSpace();
-          space.selectHyperslab(H5S_SELECT_SET, &shape[parent_t::n_dims - 1], &offst[parent_t::n_dims - 1]);
-          aux.write(data, flttype_solver, H5::DataSpace(1, &shape[parent_t::n_dims - 1]), space);
+          space.selectHyperslab(H5S_SELECT_SET, &_shape[parent_t::n_dims - 1], &_offst[parent_t::n_dims - 1]);
+          aux.write(data, flttype_solver, H5::DataSpace(1, &_shape[parent_t::n_dims - 1]), space);
         }
       }
 
@@ -627,13 +700,32 @@ namespace libmpdataxx
         }
       }
 
+      // as above but for solvers with fractal grid refinement
+      void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag)
+      {
+        record_params(hdfcp, typename std::conditional<static_cast<solvers::sgs_scheme_t>
+                               (parent_t::ct_params_t_::sgs_scheme) == solvers::iles,
+                               typename solvers::mpdata_rhs_vip_prs_family_tag,                  // iles
+                               typename std::conditional<static_cast<solvers::sgs_scheme_t>
+                                 (parent_t::ct_params_t_::sgs_scheme) == solvers::smg,
+                                 typename solvers::mpdata_rhs_vip_prs_sgs_smg_family_tag,        // smg
+                                 typename solvers::mpdata_rhs_vip_prs_sgs_dns_family_tag         // dns
+                               >::type
+                             >::type{}
+                     );
+        hdfcp.createGroup("fractal");
+        const auto &group = hdfcp.openGroup("fractal");
+        {
+          const auto type = H5::PredType::NATIVE_INT;
+          group.createAttribute("n_fra_iter", type, H5::DataSpace(1, &one)).write(type, &this->n_fra_iter);
+        }
+      }
+
       // as above but for the boussinesq solver
       void record_params(const H5::H5File &hdfcp, typename solvers::mpdata_boussinesq_family_tag)
       {
-        record_params(hdfcp, typename std::conditional<static_cast<solvers::sgs_scheme_t>
-                                                        (parent_t::ct_params_t_::sgs_scheme) == solvers::iles,
-                                                       typename solvers::mpdata_rhs_vip_prs_family_tag,
-                                                       typename solvers::mpdata_rhs_vip_prs_sgs_smg_family_tag>::type{});
+        record_params(hdfcp, typename solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag{});
+
         hdfcp.createGroup("boussinesq");
         const auto &group = hdfcp.openGroup("boussinesq");
         {
diff --git a/libmpdata++/output/hdf5_xdmf.hpp b/libmpdata++/output/hdf5_xdmf.hpp
index 91425fce..9e1d7404 100644
--- a/libmpdata++/output/hdf5_xdmf.hpp
+++ b/libmpdata++/output/hdf5_xdmf.hpp
@@ -36,8 +36,8 @@ namespace libmpdataxx
       static_assert(parent_t::n_dims > 1, "only 2D and 3D output supported");
 
       std::vector<std::string> timesteps;
-      //xdmf writer
-      detail::xdmf_writer<parent_t::n_dims> xdmfw;
+      //xdmf writer, additional one for refined data (separate .xmf files describing the refined grid, TODO: use two grids in one file? Paraview AMR dataset could help?)
+      detail::xdmf_writer<parent_t::n_dims> xdmfw, xdmfw_ref;
 
       void start(const typename parent_t::advance_arg_t nt)
       {
@@ -56,7 +56,8 @@ namespace libmpdataxx
 
           if (this->mem->G.get() != nullptr) xdmfw.add_const_attribute("G", this->const_name, this->mem->distmem.grid_size.data());
 
-          xdmfw.setup(this->const_name, this->dim_names, attr_names, this->mem->distmem.grid_size.data());
+          xdmfw.setup(    this->const_name, this->dim_names,     attr_names, this->mem->distmem.grid_size.data()    );
+          xdmfw_ref.setup(this->const_name, this->dim_names_ref, {},         this->mem->distmem.grid_size_ref.data());
         }
       }
 
@@ -70,10 +71,14 @@ namespace libmpdataxx
           std::string xmf_name = this->base_name() + ".xmf";
           xdmfw.write(this->outdir + "/" + xmf_name, this->hdf_name(), this->record_time);
 
+          std::string xmf_ref_name = this->base_name() + "_ref.xmf";
+          xdmfw_ref.write(this->outdir + "/" + xmf_ref_name, this->hdf_name(), this->record_time);
+
           // save the xmf filename for temporal write
-          timesteps.push_back(xmf_name);
+          timesteps.push_back(this->base_name());
           // write temporal xmf
-          xdmfw.write_temporal(this->outdir + "/temp.xmf", timesteps);
+          xdmfw.write_temporal(this->outdir + "/temp.xmf", timesteps, ".xmf");
+          xdmfw_ref.write_temporal(this->outdir + "/temp_ref.xmf", timesteps, "_ref.xmf"); 
         }
       }
 
@@ -92,6 +97,15 @@ namespace libmpdataxx
         parent_t::record_aux(name, data);
       }
 
+      void record_aux_refined(const std::string &name, typename solver_t::real_t *data)
+      {
+#if defined(USE_MPI)
+        if (this->mem->distmem.rank() == 0)
+#endif
+          xdmfw_ref.add_attribute(name, this->hdf_name(), this->mem->distmem.grid_size_ref.data());
+        parent_t::record_aux_refined(name, data);
+      }
+
       void record_aux_dsc(const std::string &name, const typename solver_t::arr_t &arr, bool srfc = false)
       {
         auto shape = this->mem->distmem.grid_size;
@@ -103,6 +117,16 @@ namespace libmpdataxx
         parent_t::record_aux_dsc(name, arr, srfc);
       }
 
+      void record_aux_dsc_refined(const std::string &name, const typename solver_t::arr_t &arr)
+      {
+        auto shape = this->mem->distmem.grid_size_ref;
+#if defined(USE_MPI)
+        if (this->mem->distmem.rank() == 0)
+#endif
+          xdmfw_ref.add_attribute(name, this->hdf_name(), shape.data());
+        parent_t::record_aux_dsc_refined(name, arr);
+      }
+
       public:
 
       // ctor
diff --git a/libmpdata++/solvers/detail/boussinesq_common.hpp b/libmpdata++/solvers/detail/boussinesq_common.hpp
index c7c1a6c0..e88203fa 100644
--- a/libmpdata++/solvers/detail/boussinesq_common.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_common.hpp
@@ -5,7 +5,7 @@
  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
  */
 #pragma once
-#include <libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp>
+#include <libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp>
 
 namespace libmpdataxx
 {
@@ -14,9 +14,9 @@ namespace libmpdataxx
     namespace detail
     {
       template <class ct_params_t>
-      class boussinesq_common : public libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs<ct_params_t>
+      class boussinesq_common : public libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra<ct_params_t>
       {
-        using parent_t = libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs<ct_params_t>;
+        using parent_t = libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra<ct_params_t>;
 
         public:
         using real_t = typename ct_params_t::real_t;
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
index 07d15f50..421dbeff 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_common.hpp
@@ -50,6 +50,8 @@ namespace libmpdataxx
         arrvec_t<typename parent_t::arr_t> &drv;
         arrvec_t<typename parent_t::arr_t> &wrk;
 
+        // TODO: make all ijk... objects const
+
         // like ijk, but containing vectors at the lower/left/fre edge
         // CAUTION: on sharedmem, ijkm contains overlapping ranges
         idx_t<ct_params_t::n_dims> ijkm;
@@ -60,7 +62,7 @@ namespace libmpdataxx
         // like ijk, but with range in x direction extended by 1 to the left for MPI compliance.
         // MPI requires that vector between two process domains is calculated by the process to the right of it  (c.f. remote_2d.hpp fill_halos_vctr_alng)
         // TODO: change MPI logic to assume that it is calculated by the process to the left? then, ijk_vec would not be needed(?)
-        std::array<rng_t, ct_params_t::n_dims> ijk_vec;
+        std::array<rng_t, ct_params_t::n_dims> ijk_vec; // TODO: replace std::array with idx_t
 
         real_t cdrag;
 
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp
new file mode 100644
index 00000000..23e1bc0e
--- /dev/null
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp
@@ -0,0 +1,251 @@
+/** @file
+  * @copyright University of Warsaw
+  * @section LICENSE
+  * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+  */
+
+#pragma once
+
+#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp>
+#include <libmpdata++/formulae/fractal_reconstruction.hpp>
+
+namespace libmpdataxx
+{
+  namespace solvers
+  {
+    namespace detail
+    {
+      template<typename ct_params_t, int minhalo, class enableif = void>
+      class mpdata_rhs_vip_prs_sgs_fra_dim
+      {
+//        static_assert(false);
+      };
+
+      template<typename ct_params_t, int minhalo>
+      class mpdata_rhs_vip_prs_sgs_fra_dim<
+        ct_params_t,
+        minhalo,
+        typename std::enable_if_t<ct_params_t::n_dims == 3>
+      > : public detail::mpdata_rhs_vip_prs_sgs_fra_common<ct_params_t, minhalo>
+      {
+        public:
+
+        using parent_t = detail::mpdata_rhs_vip_prs_sgs_fra_common<ct_params_t, minhalo>;
+        using parent_t::parent_t; // inheriting constructors
+        using real_t = typename ct_params_t::real_t;
+
+        private:
+  
+        // helper variables for grid refinement
+        // TODO: replace with idx_t: mid_ijk_r2r ijk_r2r_h
+        rng_t mid_ijk_r2r_0, mid_ijk_r2r_1, mid_ijk_r2r_2;     // positions between already known values (to be filled during given iteration)
+        rng_t ijk_r2r_0_h_with_halo, ijk_r2r_1_h, ijk_r2r_2_h; // all positions at resolution of given iteration
+        int stride, hstride;                                   // stride and half stride
+
+        // fill distmem halos of refinee
+        // TODO: move to bcond or sth? would be filled only by remote bcond
+        void fill_refinee_distmem_halos(const int e, const int halo_size)
+        {
+          const int e_ref = this->ix_r2r.at(e);
+          // TODO: we only need to xchng along distmem direction (x)
+          this->xchng(e);
+
+          // TODO: assert halo_size <= solver halo size
+
+          // TODO: DRY!
+          switch(halo_size)
+          {
+            case 2:
+              if(this->mem->distmem.rank() < this->mem->distmem.size() - 1)
+                this->mem->psi_ref[e_ref](
+                  this->mem->grid_size_ref[0].last() + this->mem->n_ref / 2 + this->mem->n_ref ,
+                  this->ijk_r2r[1],
+                  this->ijk_r2r[2]
+                ) = 
+                  this->mem->psi[e][0](
+                    this->ijk[0].last()+2,
+                    this->ijk[1],
+                    this->ijk[2]
+                  );
+              if(this->mem->distmem.rank() > 0)
+                this->mem->psi_ref[e_ref](
+                  this->mem->grid_size_ref[0].first() - this->mem->n_ref / 2 - this->mem->n_ref ,
+                  this->ijk_r2r[1],
+                  this->ijk_r2r[2]
+                ) = 
+                  this->mem->psi[e][0](
+                    this->ijk[0].first()-2,
+                    this->ijk[1],
+                    this->ijk[2]
+                  );
+              // no break intentionally
+            case 1:
+              if(this->mem->distmem.rank() < this->mem->distmem.size() - 1)
+                this->mem->psi_ref[e_ref](
+                  this->mem->grid_size_ref[0].last() + this->mem->n_ref / 2,
+                  this->ijk_r2r[1],
+                  this->ijk_r2r[2]
+                ) = 
+                  this->mem->psi[e][0](
+                    this->ijk[0].last()+1,
+                    this->ijk[1],
+                    this->ijk[2]
+                  );
+              if(this->mem->distmem.rank() > 0)
+                this->mem->psi_ref[e_ref](
+                  this->mem->grid_size_ref[0].first() - this->mem->n_ref / 2,
+                  this->ijk_r2r[1],
+                  this->ijk_r2r[2]
+                ) = 
+                  this->mem->psi[e][0](
+                    this->ijk[0].first()-1,
+                    this->ijk[1],
+                    this->ijk[2]
+                  );
+              break;
+            default:
+              assert(false);
+              break;
+          }
+        }
+
+        void refinement_ranges(const int iter, const int halo_size)
+        {
+          // messy, because in domain decomposition (sharedmem and distmem) some refined scalars are on the edge of the subdomain...
+          if(iter==0)
+          {
+            mid_ijk_r2r_0 = this->rng_midpoints(this->ijk_r2r[0], this->mem->distmem.rank(), this->mem->distmem.size());
+            mid_ijk_r2r_1 = this->rng_midpoints_in_out(this->ijk_r2r[1], this->rank, this->mem->size); // different because we dont want overlapping ranges in y 
+            mid_ijk_r2r_2 = this->rng_midpoints(this->ijk_r2r[2]);
+
+            ijk_r2r_1_h = this->ijk_r2r[1];
+            ijk_r2r_2_h = this->ijk_r2r[2];
+          }
+          else
+          {
+            if(iter==1)
+              mid_ijk_r2r_0 = this->rng_midpoints_in(mid_ijk_r2r_0, this->mem->distmem.rank(), this->mem->distmem.size());
+            else
+              mid_ijk_r2r_0 = this->rng_midpoints_out(mid_ijk_r2r_0);
+
+            mid_ijk_r2r_1 = this->rng_midpoints_out(mid_ijk_r2r_1);
+            mid_ijk_r2r_2 = this->rng_midpoints_out(mid_ijk_r2r_2);
+
+            ijk_r2r_1_h = this->rng_half_stride(ijk_r2r_1_h, this->rank, this->mem->size);
+            ijk_r2r_2_h = this->rng_half_stride(ijk_r2r_2_h);
+          }
+
+          stride = ijk_r2r_1_h.stride();
+          assert(ijk_r2r_1_h.stride() == ijk_r2r_2_h.stride());
+          assert(stride % 2 == 0);
+          hstride = stride / 2;
+
+          ijk_r2r_0_h_with_halo = rng_t(
+            this->mem->distmem.rank() == 0                             ? this->ijk_r2r[0].first() : this->ijk_r2r[0].first() - halo_size * this->mem->n_ref,
+            this->mem->distmem.rank() == this->mem->distmem.size() - 1 ? this->ijk_r2r[0].last()  : this->ijk_r2r[0].last()  + halo_size * this->mem->n_ref,
+            hstride
+          );
+        }
+
+        // TODO: stretching parameters at the overlaps of the reconstructed and resolved arrays (ijk_r2r) are not used, do not generate them
+        //       also not all parameters in the halo are needed (but some are!)
+        void generate_stretching_parameters(const int rng_seed = 44)
+        {
+          std::mt19937 gen(rng_seed); 
+          std::uniform_real_distribution<> dis(-1, 1); // [-1,1), but whatever
+          auto rand = std::bind(dis, gen);
+          std::generate(this->d_j(this->ijk_ref_with_halo).begin(), this->d_j(this->ijk_ref_with_halo).end(), rand);
+          this->d_j(this->ijk_ref_with_halo) = formulae::fractal::d_of_CDF_fctr<real_t>{}(this->d_j(this->ijk_ref_with_halo));
+        }
+
+        protected:
+
+        void interpolate_refinee(const int e = 0)
+        {
+          assert(opts::isset(ct_params_t::fractal_recon, opts::bit(e)));
+          const int halo_size = 1;
+          const int e_ref = this->ix_r2r.at(e);
+
+          //// TEMPORARY
+          //this->mem->psi_ref[e_ref] = -1000;
+          //this->mem->barrier();
+
+
+          fill_refinee_distmem_halos(e, halo_size);
+
+          // fill refined array at position where it overlaps with the resolved array
+          this->mem->refinee(e_ref)(this->ijk_r2r) = this->mem->advectee(e)(this->ijk);
+
+//          generate_stretching_parameters();
+
+          for(int i=0; i<this->n_fra_iter; ++i)
+          {
+            refinement_ranges(i, halo_size);
+
+            formulae::fractal::intrp<0, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_0, ijk_r2r_1_h, ijk_r2r_2_h, hstride);
+            this->mem->barrier();
+            formulae::fractal::intrp<1, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_1, ijk_r2r_2_h, ijk_r2r_0_h_with_halo, hstride);
+            formulae::fractal::intrp<2, real_t>(this->mem->psi_ref[e_ref], mid_ijk_r2r_2, ijk_r2r_0_h_with_halo, this->rng_merge(ijk_r2r_1_h, mid_ijk_r2r_1), hstride);
+          }
+          this->mem->barrier();
+        }
+
+        void reconstruct_refinee(const int e = 0)
+        {
+          assert(opts::isset(ct_params_t::fractal_recon, opts::bit(e)));
+          const int halo_size = 2;
+          const int e_ref = this->ix_r2r.at(e);
+
+          //// TEMPORARY
+          //this->mem->psi_ref[e_ref] = -1000;
+          //this->mem->barrier();
+
+          //std::cerr << "mem->grid_size_ref[0]: " << this->mem->grid_size_ref[0] << std::endl;
+          //std::cerr << "mem->grid_size_ref[1]: " << this->mem->grid_size_ref[1] << std::endl;
+          //std::cerr << "mem->grid_size_ref[2]: " << this->mem->grid_size_ref[2] << std::endl;
+
+          fill_refinee_distmem_halos(e, halo_size);
+
+          // fill refined array at position where it overlaps with the resolved array
+          this->mem->refinee(e_ref)(this->ijk_r2r) = this->mem->advectee(e)(this->ijk);
+
+          generate_stretching_parameters();
+
+          for(int i=0; i<this->n_fra_iter; ++i)
+          {
+            refinement_ranges(i, halo_size);
+
+            formulae::fractal::rcnstrct<0, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_0), ijk_r2r_1_h,           ijk_r2r_2_h,                                 hstride, this->c_j, this->d_j, this->f_j);
+            this->mem->barrier();
+            formulae::fractal::rcnstrct<1, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_1)        , ijk_r2r_2_h,           ijk_r2r_0_h_with_halo,                       hstride, this->c_j, this->d_j, this->f_j);
+            formulae::fractal::rcnstrct<2, real_t>(this->mem->psi_ref[e_ref], this->rng_dbl_stride(mid_ijk_r2r_2)        , ijk_r2r_0_h_with_halo, this->rng_merge(ijk_r2r_1_h, mid_ijk_r2r_1), hstride, this->c_j, this->d_j, this->f_j);
+          }
+          this->mem->barrier();
+        }
+
+        public:
+
+        // helper method to allocate n_arr refined scalar temporary arrays
+        static void alloc_tmp_sclr_ref(
+          typename parent_t::mem_t *mem,
+          const char * __file__, const int n_arr,
+          std::string name = "",
+          bool srfc = false
+        )
+        {
+          mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
+
+          if (!name.empty()) mem->avail_tmp[name] = std::make_pair(__file__, mem->tmp[__file__].size() - 1);
+
+          for (int n = 0; n < n_arr; ++n)
+            mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t(
+              parent_t::rng_ref_distmem_halo(mem->grid_size_ref[0], mem->n_ref, mem->distmem.rank(), mem->distmem.size()), 
+              mem->grid_size_ref[1],
+              srfc ? rng_t(0, 0) : mem->grid_size_ref[2],
+              arr3D_storage
+            )));
+        }
+      };
+    } // namespace detail
+  } // namespace solvers
+} // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp
new file mode 100644
index 00000000..a01780cf
--- /dev/null
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_common.hpp
@@ -0,0 +1,201 @@
+/**
+ * @file
+ * @copyright University of Warsaw
+ * @section LICENSE
+ * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+ *
+ */
+
+// solver with fractal reconstruction of SGS fields
+
+#pragma once
+
+#include <libmpdata++/solvers/mpdata_rhs_vip_prs_sgs.hpp>
+//#include <numeric>
+//#include <libmpdata++/formulae/idxperm.hpp>
+#include <random>
+
+namespace libmpdataxx
+{
+  namespace solvers
+  {
+    namespace detail
+    {
+      // TODO: minimum halo size is 2
+      template <class ct_params_t, int minhalo>
+      class mpdata_rhs_vip_prs_sgs_fra_common : public mpdata_rhs_vip_prs_sgs<ct_params_t, minhalo>
+      {
+        using parent_t = mpdata_rhs_vip_prs_sgs<ct_params_t, minhalo>;
+
+        public:
+
+        using real_t = typename ct_params_t::real_t;
+
+        protected:
+
+        typename parent_t::arr_t c_j, d_j, f_j; // parameters used in fractal reconstruction, Akinlabi et al. 2019
+
+
+        const int n_ref,         // number of refinements; refined resolution is dx / n_ref
+                  n_fra_iter;    // number of iterations of grid refinement
+        
+//        const int halo_ref; // size of halos of refined scalar (and vector?) arrays
+
+        const std::array<int, ct_params_t::n_eqns> ix_r2r;
+        constexpr std::array<int, ct_params_t::n_eqns> get_ix_r2r()
+        {
+          std::array<int, ct_params_t::n_eqns> ix_r2r{};
+          int j = 0;
+          for(opts::opts_t e=0; e<ct_params_t::n_eqns; ++e)
+          {
+            ix_r2r.at(e) = opts::isset(ct_params_t::fractal_recon, opts::bit(e)) ? j++ : -1; // -1 index should give segfaults
+          }
+          return ix_r2r;
+        }
+
+        // herlper ranges
+        // TODO: make these const!
+        idx_t<ct_params_t::n_dims>  ijk_ref, // range of refinee handled by given solver
+                                    ijk_ref_with_halo; // same but with a halo in x direction between MPI processes
+        const idxs_t<ct_params_t::n_dims> ijk_r2r; // resolved to refined; refined scalars at the same position as resolved scalars
+
+        // range modifying methods used in grid refinement
+        // TODO: unify
+
+        static rng_t rng_midpoints(const rng_t &rng, const int rank = 0, const int size = 1) 
+        {
+          assert(rng.stride() % 2 == 0);
+          if(rng.last() - rng.first() < rng.stride()) return rng;
+          return rng_t(
+            rank == 0 ?        rng.first() + rng.stride() / 2 : rng.first() - rng.stride() / 2,
+            rank == size - 1 ? rng.last()  - rng.stride() / 2 : rng.last()  + rng.stride() / 2, 
+            rng.stride()); 
+        }
+
+        static rng_t rng_midpoints_in_out(const rng_t &rng, const int rank = 0, const int size = 1) 
+        {
+          assert(rng.stride() % 2 == 0);
+          if(rng.last() - rng.first() < rng.stride()) return rng;
+          return rng_t(
+            rng.first() + rng.stride() / 2,
+            rank == size - 1 ? rng.last()  - rng.stride() / 2 : rng.last()  + rng.stride() / 2, 
+            rng.stride()); 
+        }
+
+        static rng_t rng_midpoints_in(const rng_t &rng, const int rank = 0, const int size = 1) 
+        {
+          assert(rng.stride() % 4 == 0);
+          if(rng.last() - rng.first() < rng.stride()) return rng;
+          else return rng_t(
+            rank == 0        ? rng.first() - rng.stride() / 4 : rng.first() + rng.stride() / 4,
+            rank == size - 1 ? rng.last()  + rng.stride() / 4 : rng.last()  - rng.stride() / 4,
+            rng.stride() / 2); 
+        }
+
+        static rng_t rng_midpoints_out(const rng_t &rng) 
+        {
+          assert(rng.stride() % 4 == 0);
+          if(rng.last() - rng.first() < rng.stride()) return rng;
+          else return rng_t(
+            rng.first() - rng.stride() / 4,
+            rng.last() + rng.stride() / 4, 
+            rng.stride() / 2); 
+        }
+
+        static rng_t rng_half_stride(const rng_t &rng, const int rank = 0, const int size = 1) 
+        {
+          assert(rng.stride() % 2 == 0);
+          return rng_t(
+            rng.first(), 
+            rank < size - 1 ? rng.last() + rng.stride() / 2 : rng.last(), 
+            rng.stride() / 2); 
+        }
+
+        // reconstruction based on 3 points
+        // input is an array pointing to midpoints (to be reconstructed), but with a stride jumping on each one
+        // returned range points to the first from the pair of reconstructed points
+        // to avoid boundary conditions, in y and z directions it is assumed that the number of points is 3+i*2, i=0,1,2,... (this check is somewhere else)
+        // however in the x direction there can be any number of points, because the domain is divided between mpi processes...
+        static rng_t rng_dbl_stride(const rng_t &rng) 
+        {
+          assert(rng.last() != rng.first()); // we need at least 2 midpoints
+          assert(rng.stride() % 2 == 0);
+
+          // if domain starts with a point in the middle of a triple - we need to start calculating from a point to the left
+          const int offset = ( (rng.first() - rng.stride() / 2) / rng.stride() ) % 2 == 0 ? 0 : - rng.stride();
+          const auto first = rng.first() + offset;
+
+          if( ((rng.last() - first) / rng.stride() + 1) % 2 == 0) // even number of midpoints; y and z directions (and sometimes x)
+            return rng_t(first, rng.last() - rng.stride(), 2*rng.stride());
+          else // odd number of midpoints
+            return rng_t(first, rng.last(),                2*rng.stride()); // rely on the halo along x direction
+        }
+
+        static rng_t rng_merge(const rng_t &rng1, const rng_t &rng2) 
+        {
+          assert(rng1.stride() == rng2.stride());
+          return rng_t(
+            std::min(rng1.first(), rng2.first()),
+            std::max(rng1.last(), rng2.last()),
+            rng1.stride() / 2);
+        }
+
+        // reconstruction based on 3 points, we need up to 2 resolved points between MPI domains
+        static rng_t rng_ref_distmem_halo(const rng_t &rng, const int &n_ref, const int rank = 0, const int size = 1)
+        {
+          return rng_t(
+            rank == 0       ? rng.first()                              : rng.first() - (n_ref + n_ref/2),
+            rank < size - 1 ? rng.last() + (n_ref + n_ref/2) : rng.last(),
+            rng.stride());
+        }
+
+        public:
+
+        struct rt_params_t : parent_t::rt_params_t
+        {
+          int n_fra_iter = 1; // number of iterations of fractal reconstruction
+        };
+
+        // ctor
+        mpdata_rhs_vip_prs_sgs_fra_common(
+          typename parent_t::ctor_args_t args,
+          const rt_params_t &p
+        ) :
+          parent_t(args, p),
+          n_fra_iter(p.n_fra_iter),
+          n_ref(args.mem->n_ref),
+//          halo_ref(n_ref / 2),
+          ix_r2r(get_ix_r2r()),
+          // ijk_ref init below assumes 3D (shmem decomp dim along y);
+          // TODO: move this to concurr_common::init()? add something to ctor_args_t?
+          ijk_r2r{
+            {this->ijk[0].first() * n_ref, this->ijk[1].first() * n_ref, this->ijk[2].first() * n_ref}, // lbound
+            {this->ijk[0].last() * n_ref, this->ijk[1].last() * n_ref, this->ijk[2].last() * n_ref},    // ubound
+            {n_ref, n_ref, n_ref}, // stride
+            }
+        {
+          assert(p.n_fra_iter > 0);
+          assert(n_ref % 2 == 0);
+
+          for (int d = 0; d < ct_params_t::n_dims; ++d)
+          {
+            if(d==1)
+            {
+              ijk_ref.lbound(d) = this->mem->slab(this->mem->grid_size_ref[d], this->rank, this->mem->size).first();// this->ijk.lbound(d) * n_ref;
+              ijk_ref.ubound(d) = this->mem->slab(this->mem->grid_size_ref[d], this->rank, this->mem->size).last();
+            }
+            else
+            {
+              ijk_ref.lbound(d) = this->mem->grid_size_ref[d].first();
+              ijk_ref.ubound(d) = this->mem->grid_size_ref[d].last();
+            }
+          }
+          ijk_ref_with_halo = ijk_ref;
+          const auto ijk_ref_with_halo_rng_0 = rng_ref_distmem_halo(ijk_ref[0], this->mem->n_ref, this->mem->distmem.rank(), this->mem->distmem.size());
+          ijk_ref_with_halo.lbound(0) = ijk_ref_with_halo_rng_0.first();
+          ijk_ref_with_halo.ubound(0) = ijk_ref_with_halo_rng_0.last();
+        }
+      };
+    } // namespace detail
+  } // namespace solvers
+} // namespace libmpdataxx
diff --git a/libmpdata++/solvers/detail/solver_1d.hpp b/libmpdata++/solvers/detail/solver_1d.hpp
index 1b007aa4..336284fd 100644
--- a/libmpdata++/solvers/detail/solver_1d.hpp
+++ b/libmpdata++/solvers/detail/solver_1d.hpp
@@ -199,6 +199,15 @@ namespace libmpdataxx
         protected:
 
         // helper method to allocate a vector-component temporary array
+        static void alloc_tmp_vctr(
+          typename parent_t::mem_t *mem,
+          const char * __file__,
+          const std::array<rng_t, 1> grid_size
+        )
+        {
+          alloc_tmp(mem, __file__, 1, parent_t::rng_vctr(grid_size[0])); // always one-component vectors
+        }
+
         static void alloc_tmp_vctr(
           typename parent_t::mem_t *mem,
           const char * __file__
diff --git a/libmpdata++/solvers/detail/solver_2d.hpp b/libmpdata++/solvers/detail/solver_2d.hpp
index fa8876f4..e9e515e6 100644
--- a/libmpdata++/solvers/detail/solver_2d.hpp
+++ b/libmpdata++/solvers/detail/solver_2d.hpp
@@ -364,6 +364,7 @@ namespace libmpdataxx
           const char * __file__,
           const int n_arr,
           const std::vector<std::vector<bool>> &stgr,
+          const std::array<rng_t, 2> grid_size,
           bool srfc = false
         )
         {
@@ -371,21 +372,42 @@ namespace libmpdataxx
           for (int n = 0; n < n_arr; ++n)
           {
             mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t(
-              stgr[n][0] ? parent_t::rng_vctr(mem->grid_size[0]) : parent_t::rng_sclr(mem->grid_size[0]),
+              stgr[n][0] ? parent_t::rng_vctr(grid_size[0]) : parent_t::rng_sclr(grid_size[0]),
               srfc ? rng_t(0, 0) :
-                stgr[n][1] ? parent_t::rng_vctr(mem->grid_size[1]) :
-                  parent_t::rng_sclr(mem->grid_size[1])
+                stgr[n][1] ? parent_t::rng_vctr(grid_size[1]) :
+                  parent_t::rng_sclr(grid_size[1])
             )));
           }
         }
 
+        // version with default grid size
+        static void alloc_tmp_stgr(
+          typename parent_t::mem_t *mem,
+          const char * __file__,
+          const int n_arr,
+          const std::vector<std::vector<bool>> &stgr,
+          bool srfc = false // allocate only surface data
+        )
+        {
+          alloc_tmp_stgr(mem, __file__, n_arr, stgr, mem->grid_size, srfc);
+        }
+
         // helper method to allocate a temporary space composed of vector-component arrays
+        static void alloc_tmp_vctr(
+          typename parent_t::mem_t *mem,
+          const char * __file__,
+          const std::array<rng_t, 2> grid_size
+        )
+        {
+          alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}}, grid_size, false);
+        }
+
         static void alloc_tmp_vctr(
           typename parent_t::mem_t *mem,
           const char * __file__
         )
         {
-          alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}});
+          alloc_tmp_stgr(mem, __file__, 2, {{true, false}, {false, true}}, false);
         }
 
         // helper method to allocate n_arr scalar temporary arrays
@@ -393,7 +415,7 @@ namespace libmpdataxx
           typename parent_t::mem_t *mem,
           const char * __file__, const int n_arr,
           std::string name = "",
-          bool srfc = false
+          bool srfc = false 
         )
         {
           mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
diff --git a/libmpdata++/solvers/detail/solver_3d.hpp b/libmpdata++/solvers/detail/solver_3d.hpp
index ea62aac9..3391dd2b 100644
--- a/libmpdata++/solvers/detail/solver_3d.hpp
+++ b/libmpdata++/solvers/detail/solver_3d.hpp
@@ -488,6 +488,7 @@ namespace libmpdataxx
           const char * __file__,
           const int n_arr,
           const std::vector<std::vector<bool>> &stgr,
+          const std::array<rng_t, 3> grid_size,
           bool srfc = false // allocate only surface data
         )
         {
@@ -495,23 +496,44 @@ namespace libmpdataxx
           for (int n = 0; n < n_arr; ++n)
           {
             mem->tmp[__file__].back().push_back(mem->old(new typename parent_t::arr_t(
-              stgr[n][0] ? parent_t::rng_vctr(mem->grid_size[0]) : parent_t::rng_sclr(mem->grid_size[0]),
-              stgr[n][1] ? parent_t::rng_vctr(mem->grid_size[1]) : parent_t::rng_sclr(mem->grid_size[1]),
+              stgr[n][0] ? parent_t::rng_vctr(grid_size[0]) : parent_t::rng_sclr(grid_size[0]),
+              stgr[n][1] ? parent_t::rng_vctr(grid_size[1]) : parent_t::rng_sclr(grid_size[1]),
               srfc ? rng_t(0, 0) :
-                stgr[n][2] ? parent_t::rng_vctr(mem->grid_size[2]) :
-                  parent_t::rng_sclr(mem->grid_size[2]),
+                stgr[n][2] ? parent_t::rng_vctr(grid_size[2]) :
+                  parent_t::rng_sclr(grid_size[2]),
               arr3D_storage
             )));
           }
         }
 
+        // version with default grid size
+        static void alloc_tmp_stgr(
+          typename parent_t::mem_t *mem,
+          const char * __file__,
+          const int n_arr,
+          const std::vector<std::vector<bool>> &stgr,
+          bool srfc = false // allocate only surface data
+        )
+        {
+          alloc_tmp_stgr(mem, __file__, n_arr, stgr, mem->grid_size, srfc);
+        }
+
         // helper method to allocate a temporary space composed of vector-component arrays
+        static void alloc_tmp_vctr(
+          typename parent_t::mem_t *mem,
+          const char * __file__,
+          const std::array<rng_t, 3> grid_size
+        )
+        {
+          alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}}, grid_size, false);
+        }
+
         static void alloc_tmp_vctr(
           typename parent_t::mem_t *mem,
           const char * __file__
         )
         {
-          alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}});
+          alloc_tmp_stgr(mem, __file__, 3, {{true, false, false}, {false, true, false}, {false, false, true}}, false);
         }
 
         // helper method to allocate n_arr scalar temporary arrays
@@ -519,7 +541,7 @@ namespace libmpdataxx
           typename parent_t::mem_t *mem,
           const char * __file__, const int n_arr,
           std::string name = "",
-          bool srfc = false // allocate only surface data
+          bool srfc = false 
         )
         {
           mem->tmp[__file__].push_back(new arrvec_t<typename parent_t::arr_t>());
diff --git a/libmpdata++/solvers/detail/solver_common.hpp b/libmpdata++/solvers/detail/solver_common.hpp
index 5aab5f07..90126ab2 100644
--- a/libmpdata++/solvers/detail/solver_common.hpp
+++ b/libmpdata++/solvers/detail/solver_common.hpp
@@ -8,9 +8,10 @@
 
 #include <libmpdata++/blitz.hpp>
 #include <libmpdata++/formulae/arakawa_c.hpp>
-#include <libmpdata++/concurr/detail/sharedmem.hpp>
+#include <libmpdata++/concurr/detail/sharedmem_refined.hpp>
 
 #include <libmpdata++/solvers/detail/monitor.hpp>
+#include <libmpdata++/solvers/detail/solver_type_traits.hpp>
 
 #include <libmpdata++/bcond/detail/bcond_common.hpp>
 
@@ -72,7 +73,18 @@ namespace libmpdataxx
         real_t time = 0;
         std::vector<int> n;
 
-        typedef concurr::detail::sharedmem<real_t, n_dims, n_tlev> mem_t;
+        using shmem_ref_t = concurr::detail::sharedmem_refined<real_t, n_dims, n_tlev>;
+        using shmem_t = concurr::detail::sharedmem<real_t, n_dims, n_tlev>;
+
+        public:
+
+        // if fractal reconstruction of some variables is required in ct_params, use special type of sharedmem
+        using mem_t = typename std::conditional_t<
+          detail::slvr_with_frac_recn_v<ct_params_t>,
+          shmem_ref_t, shmem_t>;
+
+        protected:
+
         mem_t *mem;
 
         // helper methods invoked by solve()
diff --git a/libmpdata++/solvers/detail/solver_type_traits.hpp b/libmpdata++/solvers/detail/solver_type_traits.hpp
new file mode 100644
index 00000000..bbf60361
--- /dev/null
+++ b/libmpdata++/solvers/detail/solver_type_traits.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+namespace libmpdataxx
+{
+  namespace solvers
+  {
+    namespace detail
+    {
+      // helpers that detect if the solver uses fractal reconstruction
+      template< class, class = void >
+      struct slvr_with_frac_recn : std::false_type { };
+
+      template< class ct_params_t >
+      struct slvr_with_frac_recn<
+        ct_params_t, 
+        typename std::enable_if_t<(int)ct_params_t::fractal_recon != (int)0 > > : std::true_type { };
+        //std::enable_if_t< std::is_base_of_v< libmpdataxx::solvers::mpdata_rhs_vip_prs_sgs_fra_family_tag, solver_t > > > : std::true_type { };
+
+      template <class ct_params_t>
+      inline constexpr bool slvr_with_frac_recn_v = slvr_with_frac_recn<ct_params_t>::value;
+    }
+  }
+}
diff --git a/libmpdata++/solvers/mpdata_rhs.hpp b/libmpdata++/solvers/mpdata_rhs.hpp
index 351d7c43..d247ae1a 100644
--- a/libmpdata++/solvers/mpdata_rhs.hpp
+++ b/libmpdata++/solvers/mpdata_rhs.hpp
@@ -45,6 +45,7 @@ namespace libmpdataxx
 #endif
 
       protected:
+
       using solver_family = mpdata_rhs_family_tag;
 
       // member fields
diff --git a/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp
new file mode 100644
index 00000000..4397571b
--- /dev/null
+++ b/libmpdata++/solvers/mpdata_rhs_vip_prs_sgs_fra.hpp
@@ -0,0 +1,71 @@
+/**
+ * @file
+ * @copyright University of Warsaw
+ * @section LICENSE
+ * GPLv3+ (see the COPYING file or http://www.gnu.org/licenses/)
+ *
+ */
+
+#pragma once
+
+#include <libmpdata++/solvers/detail/mpdata_rhs_vip_prs_sgs_fra_3d.hpp>
+
+namespace libmpdataxx
+{
+  namespace solvers
+  {
+    struct mpdata_rhs_vip_prs_sgs_fra_family_tag {};
+
+    template<typename ct_params_t, int minhalo = 0, class enableif = void>
+    class mpdata_rhs_vip_prs_sgs_fra;
+
+    // no fields with fractal reconstruction, inherit from mpdata_rhs_vip_prs_sgs
+    template<typename ct_params_t, int minhalo>
+    class mpdata_rhs_vip_prs_sgs_fra<
+      ct_params_t, minhalo,
+      typename std::enable_if_t<(int)ct_params_t::fractal_recon == (int)0> 
+    > : public mpdata_rhs_vip_prs_sgs<ct_params_t, minhalo>
+    {
+      using parent_t = mpdata_rhs_vip_prs_sgs<ct_params_t, minhalo>;
+      using parent_t::parent_t; // inheriting constructors
+    };
+
+    template <class ct_params_t, int minhalo>
+    class mpdata_rhs_vip_prs_sgs_fra<
+      ct_params_t, minhalo,
+      typename std::enable_if_t<(int)ct_params_t::fractal_recon != (int)0>
+    > : public detail::mpdata_rhs_vip_prs_sgs_fra_dim<ct_params_t, minhalo>
+    {
+      using parent_t = detail::mpdata_rhs_vip_prs_sgs_fra_dim<ct_params_t, minhalo>;
+
+      protected:
+      using solver_family = mpdata_rhs_vip_prs_sgs_fra_family_tag;
+
+      //ctor
+      mpdata_rhs_vip_prs_sgs_fra(
+        typename parent_t::ctor_args_t args,
+        const typename parent_t::rt_params_t &p
+      ) :
+        parent_t(args, p)
+      {
+        this->c_j.reference(args.mem->tmp[__FILE__][1][0]);
+        this->d_j.reference(args.mem->tmp[__FILE__][1][1]);
+        this->f_j.reference(args.mem->tmp[__FILE__][1][2]);
+      }
+
+      // why alloc here?
+      static void alloc(
+        typename parent_t::mem_t *mem,
+        const int &n_iters
+      ) {
+        parent_t::alloc(mem, n_iters);
+        parent_t::alloc_tmp_sclr_ref(mem, __FILE__, opts::nset(ct_params_t::fractal_recon));
+        parent_t::alloc_tmp_sclr_ref(mem, __FILE__, 3); // c_j, d_j, f_j
+//        parent_t::alloc_tmp_vctr(mem, __FILE__, mem->grid_size_ref);                                 // GC_ref
+
+        mem->psi_ref = mem->tmp[__FILE__][0];
+//        mem->GC_ref = mem->tmp[__FILE__][1];
+      }
+    };
+  } // namespace solvers
+} // namespace libmpdataxx
diff --git a/tests/sandbox/pbl/pbl.hpp b/tests/sandbox/pbl/pbl.hpp
index 0b2f7079..7ff0e255 100644
--- a/tests/sandbox/pbl/pbl.hpp
+++ b/tests/sandbox/pbl/pbl.hpp
@@ -59,7 +59,11 @@ class pbl : public libmpdataxx::output::hdf5_xdmf<libmpdataxx::solvers::boussine
     
     if (this->timestep % static_cast<int>(this->outfreq) == 0)
     {
+      //this->reconstruct_refinee(ix::w);
+      this->reconstruct_refinee(ix::tht);
+
       if (this->rank == 0) std::cout << this->timestep << std::endl;
+
       this->mem->barrier();
       if (this->rank == 0)
       {
@@ -68,6 +72,17 @@ class pbl : public libmpdataxx::output::hdf5_xdmf<libmpdataxx::solvers::boussine
           this->record_aux_dsc("tke", this->tke);
         }
         this->record_aux_dsc("p", this->Phi);
+        //this->record_aux_dsc_refined("w reconstructed", this->mem->refinee(this->ix_r2r.at(ix::w)));
+        this->record_aux_dsc_refined("tht reconstructed", this->mem->refinee(this->ix_r2r.at(ix::tht)));
+      }
+      this->mem->barrier();
+
+      this->interpolate_refinee(ix::tht);
+
+      this->mem->barrier();
+      if (this->rank == 0)
+      {
+        this->record_aux_dsc_refined("tht interpolated", this->mem->refinee(this->ix_r2r.at(ix::tht)));
       }
       this->mem->barrier();
     }
diff --git a/tests/sandbox/pbl/pbl_test_def.hpp b/tests/sandbox/pbl/pbl_test_def.hpp
index ef63d592..9409c4bb 100644
--- a/tests/sandbox/pbl/pbl_test_def.hpp
+++ b/tests/sandbox/pbl/pbl_test_def.hpp
@@ -22,6 +22,7 @@ template <typename params_t>
 void set_sgs_specific(params_t &p, iles_tag)
 {
   p.iles_cdrag = 0.1;
+  p.n_fra_iter = 2;
 }
 
 // smagorinsky
@@ -32,6 +33,7 @@ void set_sgs_specific(params_t &p, smg_tag)
   p.smg_c = 0.165;
   p.prandtl_num = 0.42;
   p.cdrag = 0.1;
+  p.n_fra_iter = 4;
 }
 
 template <typename sgs_t>
@@ -54,6 +56,8 @@ void test(const std::string &dirname, const int np, const int nt)
       u, v, w, tht,
       vip_i=u, vip_j=v, vip_k=w, vip_den=-1
     }; };
+    //enum { fractal_recon = libmpdataxx::opts::bit(ix::w) };
+    enum { fractal_recon = libmpdataxx::opts::bit(ix::tht) };
   }; 
 
   using ix = typename ct_params_t::ix;