From c4cc7c0697b41e9f4d50a802b284e0137f2e025d Mon Sep 17 00:00:00 2001
From: Patrick Kilian <pkilian@spacescience.org>
Date: Thu, 4 Feb 2021 16:38:41 -0700
Subject: [PATCH] Allow unequal domain decomposition

into chunks of roughly equal cost. Cost is supplied by the user through
a cost function. In the example of the harris current sheet this
improves parallelization efficancy on 8 cores from 25% to 80% which is
well in the range that I get when I split in y or z using either old or
new code.
---
 sample/harrisSplit    | 361 ++++++++++++++++++++++++++++++++++++++++++
 src/grid/grid.h       |  35 +++-
 src/grid/partition.cc | 305 +++++++++++++++++++++++++++++++++--
 src/vpic/dump.cc      | 140 ++++------------
 src/vpic/vpic.h       |  39 +++++
 5 files changed, 755 insertions(+), 125 deletions(-)
 create mode 100644 sample/harrisSplit

diff --git a/sample/harrisSplit b/sample/harrisSplit
new file mode 100644
index 00000000..b070295e
--- /dev/null
+++ b/sample/harrisSplit
@@ -0,0 +1,361 @@
+// Magnetic reconnection in a Harris equilibrium thin current sheet
+//
+// This input deck reproduces the PIC simulations found in:
+//   William Daughton. "Nonlinear dynamics of thin current sheets." Phys.
+//   Plasmas. 9(9): 3668-3678. September 2002.
+//
+// This input deck was written by:
+//   Kevin J Bowers, Ph.D.
+//   Plasma Physics Group (X-1)
+//   Applied Physics Division
+//   Los Alamos National Lab
+// August 2003      - original version
+// October 2003     - heavily revised to utilize input deck syntactic sugar
+// March/April 2004 - rewritten for domain decomposition V4PIC
+
+// rough density estimate over the whole run of the simulation to split the
+// domain into chunks that have roughly the same number of particles, despite
+// the density bump inside the current sheet
+class MyCost: public Cost {
+  private:
+    const float l = 4.37; // Rough thickness at later times
+
+  public:
+    float cost(const int ix, const int iy, const int iz) {
+      return pow(cosh((ix-32.)/l),-2);
+   }
+};
+
+// If you want to use global variables (for example, to store the dump
+// intervals for your diagnostics section), it must be done in the globals
+// section. Variables declared the globals section will be preserved across
+// restart dumps. For example, if the globals section is:
+//   begin_globals {
+//     double variable;
+//   } end_globals
+// the double "variable" will be visible to other input deck sections as
+// "global->variable". Note: Variables declared in the globals section are set
+// to zero before the user's initialization block is executed. Up to 16K
+// of global variables can be defined.
+
+begin_globals {
+  double energies_interval;
+  double fields_interval;
+  double ehydro_interval;
+  double ihydro_interval;
+  double eparticle_interval;
+  double iparticle_interval;
+  double restart_interval;
+};
+
+begin_initialization {
+  // At this point, there is an empty grid and the random number generator is
+  // seeded with the rank. The grid, materials, species need to be defined.
+  // Then the initial non-zero fields need to be loaded at time level 0 and the
+  // particles (position and momentum both) need to be loaded at time level 0.
+
+  double input_mass_ratio;
+  int input_seed;
+
+  // Arguments can be passed from the command line to the input deck
+  if( num_cmdline_arguments!=3 ) {
+    // Set sensible defaults
+    input_mass_ratio = 1.0;
+    input_seed = 0;
+
+    sim_log( "Defaulting to mass_ratio of " << input_mass_ratio << " and seed of " << input_seed );
+    sim_log( "For Custom Usage: " << cmdline_argument[0] << " mass_ratio seed" );
+  }
+  else {
+    input_mass_ratio   = atof(cmdline_argument[1]); // Ion mass / electron mass
+    input_seed   = atof(cmdline_argument[2]); // Ion mass / electron mass
+    sim_log( "Detected input mass_ratio of " << input_mass_ratio << " and seed of " << input_seed );
+  }
+  seed_entropy( input_seed );
+
+  // Diagnostic messages can be passed written (usually to stderr)
+  sim_log( "Computing simulation parameters");
+
+  // Define the system of units for this problem (natural units)
+  double L    = 1; // Length normalization (sheet thickness)
+  double ec   = 1; // Charge normalization
+  double me   = 1; // Mass normalization
+  double c    = 1; // Speed of light
+  double eps0 = 1; // Permittivity of space
+
+  // Physics parameters
+  double mi_me   = input_mass_ratio; // Ion mass / electron mass
+  double rhoi_L  = 1;    // Ion thermal gyroradius / Sheet thickness
+  double Ti_Te   = 1;    // Ion temperature / electron temperature
+  double wpe_wce = 3;    // Electron plasma freq / electron cycltron freq
+  double theta   = 0;    // Orientation of the simulation wrt current sheet
+  double taui    = 100;  // Simulation wci's to run
+
+  // Numerical parameters
+  double Lx        = 16*L;  // How big should the box be in the x direction
+  double Ly        = 16*L;  // How big should the box be in the y direction
+  double Lz        = 16*L;  // How big should the box be in the z direction
+  double nx        = 64;    // Global resolution in the x direction
+  double ny        = 64;    // Global resolution in the y direction
+  double nz        = 64;    // Global resolution in the z direction
+  double nppc      = 200;   // Average number of macro particles per cell (both species combined!)
+  double cfl_req   = 0.99;  // How close to Courant should we try to run
+  double wpedt_max = 0.36;  // How big a timestep is allowed if Courant is not too restrictive
+  double damp      = 0.001; // Level of radiation damping
+
+  // Derived quantities
+  double mi   = me*mi_me;                             // Ion mass
+  double kTe  = me*c*c/(2*wpe_wce*wpe_wce*(1+Ti_Te)); // Electron temperature
+  double kTi  = kTe*Ti_Te;                            // Ion temperature
+  double vthe = sqrt(2*kTe/me);                       // Electron thermal velocity (B.D. convention)
+  double vthi = sqrt(2*kTi/mi);                       // Ion thermal velocity (B.D. convention)
+  double wci  = vthi/(rhoi_L*L);                      // Ion cyclotron frequency
+  double wce  = wci*mi_me;                            // Electron cyclotron frequency
+  double wpe  = wce*wpe_wce;                          // Electron plasma frequency
+  double wpi  = wpe/sqrt(mi_me);                      // Ion plasma frequency
+  double vdre = c*c*wce/(wpe*wpe*L*(1+Ti_Te));        // Electron drift velocity
+  double vdri = -Ti_Te*vdre;                          // Ion drift velocity
+  double b0   = me*wce/ec;                            // Asymptotic magnetic field strength
+  double n0   = me*eps0*wpe*wpe/(ec*ec);              // Peak electron density (also peak ion density)
+  double Npe  = 2*n0*Ly*Lz*L*tanh(0.5*Lx/L);          // Number of physical electrons in box
+  double Npi  = Npe;                                  // Number of physical ions in box
+  double Ne   = 0.5*nppc*nx*ny*nz;                    // Total macro electrons in box
+  Ne = trunc_granular(Ne,nproc());                    // Make it divisible by number of processors
+  double Ni   = Ne;                                   // Total macro ions in box
+  double we   = Npe/Ne;                               // Weight of a macro electron
+  double wi   = Npi/Ni;                               // Weight of a macro ion
+  double gdri = 1/sqrt(1-vdri*vdri/(c*c));            // gamma of ion drift frame
+  double gdre = 1/sqrt(1-vdre*vdre/(c*c));            // gamma of electron drift frame
+  double udri = vdri*gdri;                            // 4-velocity of ion drift frame
+  double udre = vdre*gdre;                            // 4-velocity of electron drift frame
+  double uthi = sqrt(kTi/mi)/c;                       // Normalized ion thermal velocity (K.B. convention)
+  double uthe = sqrt(kTe/me)/c;                       // Normalized electron thermal velocity (K.B. convention)
+  double cs   = cos(theta);
+  double sn   = sin(theta);
+
+  // Determine the timestep
+  double dg = courant_length(Lx,Ly,Lz,nx,ny,nz);      // Courant length
+  double dt = cfl_req*dg/c;                           // Courant limited time step
+  if( wpe*dt>wpedt_max ) dt=wpedt_max/wpe;            // Override time step if plasma frequency limited
+
+  ////////////////////////////////////////
+  // Setup high level simulation parmeters
+
+  num_step             = int(0.2*taui/(wci*dt));
+  status_interval      = int(1./(wci*dt));
+  sync_shared_interval = status_interval;
+  clean_div_e_interval = status_interval;
+  clean_div_b_interval = status_interval;
+
+  global->energies_interval  = status_interval;
+  global->fields_interval    = status_interval;
+  global->ehydro_interval    = status_interval;
+  global->ihydro_interval    = status_interval;
+  global->eparticle_interval = status_interval;
+  global->iparticle_interval = status_interval;
+  global->restart_interval   = status_interval;
+
+  ///////////////////////////
+  // Setup the space and time
+
+  // Setup basic grid parameters
+  define_units( c, eps0 );
+  define_timestep( dt );
+
+  const int topo_x = 1;
+  const int topo_y = 1;
+  const int topo_z = nproc();
+  MyCost particlecounts;
+
+  // Parition a periodic box among the processors
+  define_nonuniform_periodic_grid( -0.5*Lx, 0, 0,           // Low corner
+                                    0.5*Lx, Ly, Lz,         // High corner
+                                    nx, ny, nz,             // Resolution
+                                    topo_x, topo_y, topo_z, // Topology
+                                    particlecounts);        // Split smarter
+
+  // Override some of the boundary conditions to put a particle reflecting
+  // perfect electrical conductor on the -x and +x boundaries
+  set_domain_field_bc( BOUNDARY(-1,0,0), pec_fields );
+  set_domain_field_bc( BOUNDARY( 1,0,0), pec_fields );
+  set_domain_particle_bc( BOUNDARY(-1,0,0), reflect_particles );
+  set_domain_particle_bc( BOUNDARY( 1,0,0), reflect_particles );
+
+  define_material( "vacuum", 1 );
+  // Note: define_material defaults to isotropic materials with mu=1,sigma=0
+  // Tensor electronic, magnetic and conductive materials are supported
+  // though. See "shapes" for how to define them and assign them to regions.
+  // Also, space is initially filled with the first material defined.
+
+  // If you pass NULL to define field array, the standard field array will
+  // be used (if damp is not provided, no radiation damping will be used).
+  define_field_array( NULL, damp );
+
+  ////////////////////
+  // Setup the species
+
+  // Allow 50% more local_particles in case of non-uniformity
+  // VPIC will pick the number of movers to use for each species
+  // Both species use out-of-place sorting
+  species_t * ion      = define_species( "ion",       ec, mi, 1.5*Ni/(topo_y*topo_z), -1, 40, 1 );
+  species_t * electron = define_species( "electron", -ec, me, 1.5*Ne/(topo_y*topo_z), -1, 20, 1 );
+
+  ///////////////////////////////////////////////////
+  // Log diagnostic information about this simulation
+
+  sim_log( "" );
+  sim_log( "System of units" );
+  sim_log( "L = " << L );
+  sim_log( "ec = " << ec );
+  sim_log( "me = " << me );
+  sim_log( "c = " << c );
+  sim_log( "eps0 = " << eps0 );
+  sim_log( "" );
+  sim_log( "Physics parameters" );
+  sim_log( "rhoi/L = " << rhoi_L );
+  sim_log( "Ti/Te = " << Ti_Te );
+  sim_log( "wpe/wce = " << wpe_wce );
+  sim_log( "mi/me = " << mi_me );
+  sim_log( "theta = " << theta );
+  sim_log( "taui = " << taui );
+  sim_log( "" );
+  sim_log( "Numerical parameters" );
+  sim_log( "num_step = " << num_step );
+  sim_log( "dt = " << dt );
+  sim_log( "Lx = " << Lx << ", Lx/L = " << Lx/L );
+  sim_log( "Ly = " << Ly << ", Ly/L = " << Ly/L );
+  sim_log( "Lz = " << Lz << ", Lz/L = " << Lz/L );
+  sim_log( "nx = " << nx << ", dx = " << Lx/nx << ", L/dx = " << L*nx/Lx );
+  sim_log( "ny = " << ny << ", dy = " << Ly/ny << ", L/dy = " << L*ny/Ly );
+  sim_log( "nz = " << nz << ", dz = " << Lz/nz << ", L/dz = " << L*nz/Lz );
+  sim_log( "nppc = " << nppc );
+  sim_log( "courant = " << c*dt/dg );
+  sim_log( "damp = " << damp );
+  sim_log( "" );
+  sim_log( "Ion parameters" );
+  sim_log( "qpi = "  << ec << ", mi = " << mi << ", qpi/mi = " << ec/mi );
+  sim_log( "vthi = " << vthi << ", vthi/c = " << vthi/c << ", kTi = " << kTi  );
+  sim_log( "vdri = " << vdri << ", vdri/c = " << vdri/c );
+  sim_log( "wpi = " << wpi << ", wpi dt = " << wpi*dt << ", n0 = " << n0 );
+  sim_log( "wci = " << wci << ", wci dt = " << wci*dt );
+  sim_log( "rhoi = " << vthi/wci << ", L/rhoi = " << L/(vthi/wci) << ", dx/rhoi = " << (Lx/nx)/(vthi/wci) );
+  sim_log( "debyei = " << vthi/wpi << ", L/debyei = " << L/(vthi/wpi) << ", dx/debyei = " << (Lx/nx)/(vthi/wpi) );
+  sim_log( "Npi = " << Npi << ", Ni = " << Ni << ", Npi/Ni = " << Npi/Ni << ", wi = " << wi );
+  sim_log( "" );
+  sim_log( "Electron parameters" );
+  sim_log( "qpe = "  << -ec << ", me = " << me << ", qpe/me = " << -ec/me );
+  sim_log( "vthe = " << vthe << ", vthe/c = " << vthe/c << ", kTe = " << kTe  );
+  sim_log( "vdre = " << vdre << ", vdre/c = " << vdre/c );
+  sim_log( "wpe = " << wpe << ", wpe dt = " << wpe*dt << ", n0 = " << n0 );
+  sim_log( "wce = " << wce << ", wce dt = " << wce*dt );
+  sim_log( "rhoe = " << vthe/wce << ", L/rhoe = " << L/(vthe/wce) << ", dx/rhoe = " << (Lx/nx)/(vthe/wce) );
+  sim_log( "debyee = " << vthe/wpe << ", L/debyee = " << L/(vthe/wpe) << ", dx/debyee = " << (Lx/nx)/(vthe/wpe) );
+  sim_log( "Npe = " << Npe << ", Ne = " << Ne << ", Npe/Ne = " << Npe/Ne << ", we = " << we );
+  sim_log( "" );
+  sim_log( "Miscellaneous" );
+  sim_log( "nptotal = " << Ni + Ne );
+  sim_log( "nproc = " << nproc() );
+  sim_log( "" );
+
+  ////////////////////////////
+  // Load fields and particles
+
+  sim_log( "Loading fields" );
+
+  set_region_field( everywhere, 0, 0, 0,                    // Electric field
+                    0, -sn*b0*tanh(x/L), cs*b0*tanh(x/L) ); // Magnetic field
+  // Note: everywhere is a region that encompasses the entire simulation
+  // In general, regions are specied as logical equations (i.e. x>0 && x+y<2)
+
+  sim_log( "Loading particles" );
+
+
+  repeat( Ni/(topo_y*topo_z) ) {
+    double x, y, z, ux, uy, uz, d0;
+
+    // Pick an appropriately distributed random location for the pair
+    do {
+      x = L*atanh( uniform( rng(0), -1, 1 ) );
+    } while( x<=-0.5*Lx || x>=0.5*Lx );
+    y = uniform( rng(0), grid->y0, grid->y1 );
+    z = uniform( rng(0), grid->z0, grid->z1 );
+
+    // For the ion, pick an isothermal normalized momentum in the drift frame
+    // (this is a proper thermal equilibrium in the non-relativistic limit),
+    // boost it from the drift frame to the frame with the magnetic field
+    // along z and then rotate it into the lab frame. Then load the particle.
+    // Repeat the process for the electron.
+
+    ux = normal( rng(0), 0, uthi );
+    uy = normal( rng(0), 0, uthi );
+    uz = normal( rng(0), 0, uthi );
+    d0 = gdri*uy + sqrt(ux*ux+uy*uy+uz*uz+1)*udri;
+    uy = d0*cs - uz*sn;
+    uz = d0*sn + uz*cs;
+    inject_particle( ion,      x, y, z, ux, uy, uz, wi, 0, 0 );
+
+    ux = normal( rng(0), 0, uthe );
+    uy = normal( rng(0), 0, uthe );
+    uz = normal( rng(0), 0, uthe );
+    d0 = gdre*uy + sqrt(ux*ux+uy*uy+uz*uz+1)*udre;
+    uy = d0*cs - uz*sn;
+    uz = d0*sn + uz*cs;
+    inject_particle( electron, x, y, z, ux, uy, uz, we, 0, 0 );
+  }
+  fprintf(stderr, "GREP %d electrons, %d protons on rank %d\n", electron->np, ion->np, rank());
+
+  // Upon completion of the initialization, the following occurs:
+  // - The synchronization error (tang E, norm B) is computed between domains
+  //   and tang E / norm B are synchronized by averaging where discrepancies
+  //   are encountered.
+  // - The initial divergence error of the magnetic field is computed and
+  //   one pass of cleaning is done (for good measure)
+  // - The bound charge density necessary to give the simulation an initially
+  //   clean divergence e is computed.
+  // - The particle momentum is uncentered from u_0 to u_{-1/2}
+  // - The user diagnostics are called on the initial state
+  // - The physics loop is started
+  //
+  // The physics loop consists of:
+  // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2}
+  // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles)
+  // - User current injection (adjust field(x,y,z).jfx, jfy, jfz)
+  // - Advance B from B_0 to B_{1/2}
+  // - Advance E from E_0 to E_1
+  // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz)
+  // - Advance B from B_{1/2} to B_1
+  // - (periodically) Divergence clean electric field
+  // - (periodically) Divergence clean magnetic field
+  // - (periodically) Synchronize shared tang e and norm b
+  // - Increment the time step
+  // - Call user diagnostics
+  // - (periodically) Print a status message
+}
+
+begin_diagnostics {
+  // Removed for benchmarking
+}
+
+begin_particle_injection {
+
+  // No particle injection for this simulation
+
+}
+
+begin_current_injection {
+
+  // No current injection for this simulation
+
+}
+
+begin_field_injection {
+
+  // No field injection for this simulation
+
+}
+
+begin_particle_collisions{
+
+  // No collisions for this simulation
+
+}
diff --git a/src/grid/grid.h b/src/grid/grid.h
index 3167c7e6..264c7a61 100644
--- a/src/grid/grid.h
+++ b/src/grid/grid.h
@@ -99,8 +99,8 @@ typedef struct grid {
                             // 0 ... nproc-1 ... comm boundary condition
                             // <0 ... locally applied boundary condition
 
-  int gpx, gpy, gpz = -1; // Store global processor decomposition to let us figure
-                     // out where we are in the global decomposition
+  int gnx, gny, gnz = -1; // Global size of domain
+  int nx0, ny0, nz0 = -1; // Min cell local domain
 
   // Phase 3 grid data structures
   // NOTE: VOXEL INDEXING LIMITS NUMBER OF VOXELS TO 2^31 (INCLUDING
@@ -243,6 +243,37 @@ partition_metal_box( grid_t *g,
                      int gnx, int gny, int gnz,
                      int gpx, int gpy, int gpz );
 
+END_C_DECLS
+class Cost {
+  public:
+    virtual float cost(const int ix, const int iy, const int iz) = 0;
+};
+
+void
+partition_nonuniform_periodic_box( grid_t *g,
+                                   double gx0, double gy0, double gz0,
+                                   double gx1, double gy1, double gz1,
+                                   int gnx, int gny, int gnz,
+                                   int gpx, int gpy, int gpz,
+                                   Cost& c );
+
+void
+partition_nonuniform_absorbing_box( grid_t *g,
+                                    double gx0, double gy0, double gz0,
+                                    double gx1, double gy1, double gz1,
+                                    int gnx, int gny, int gnz,
+                                    int gpx, int gpy, int gpz,
+                                    Cost& c );
+
+void
+partition_nonuniform_metal_box( grid_t *g,
+                                double gx0, double gy0, double gz0,
+                                double gx1, double gy1, double gz1,
+                                int gnx, int gny, int gnz,
+                                int gpx, int gpy, int gpz,
+                                Cost& c );
+
+BEGIN_C_DECLS
 // In grid_comm.c
 
 // FIXME: SHOULD TAKE A RAW PORT INDEX INSTEAD OF A PORT COORDS
diff --git a/src/grid/partition.cc b/src/grid/partition.cc
index e95f7225..8f6c9d61 100644
--- a/src/grid/partition.cc
+++ b/src/grid/partition.cc
@@ -56,9 +56,9 @@ partition_periodic_box( grid_t * g,
   RANK_TO_INDEX( world_rank, px,py,pz );
 
   // Capture global processor decomposition
-  g->gpx = gpx;
-  g->gpy = gpy;
-  g->gpz = gpz;
+  g->gnx = gnx;
+  g->gny = gny;
+  g->gnz = gnz;
 
   g->dx = (gx1-gx0)/(double)gnx;
   g->dy = (gy1-gy0)/(double)gny;
@@ -74,16 +74,28 @@ partition_periodic_box( grid_t * g,
            ((double)gny/(gy1-gy0))*
            ((double)gnz/(gz1-gz0))*0.125;
 
-  f = (double) px   /(double)gpx; g->x0 = gx0*(1-f) + gx1*f;
-  f = (double) py   /(double)gpy; g->y0 = gy0*(1-f) + gy1*f;
-  f = (double) pz   /(double)gpz; g->z0 = gz0*(1-f) + gz1*f;
+  int nx0 = (gnx * px) / gpx;
+  int ny0 = (gny * py) / gpy;
+  int nz0 = (gnz * pz) / gpz;
 
-  f = (double)(px+1)/(double)gpx; g->x1 = gx0*(1-f) + gx1*f;
-  f = (double)(py+1)/(double)gpy; g->y1 = gy0*(1-f) + gy1*f;
-  f = (double)(pz+1)/(double)gpz; g->z1 = gz0*(1-f) + gz1*f;
+  int nx1 = (gnx * (px+1)) / gpx;
+  int ny1 = (gny * (py+1)) / gpy;
+  int nz1 = (gnz * (pz+1)) / gpz;
+
+  g->nx0 = nx0;
+  g->ny0 = ny0;
+  g->nz0 = nz0;
+
+  f = (double)nx0 / (double)gnx; g->x0 = gx0*(1-f) + gx1*f;
+  f = (double)ny0 / (double)gny; g->y0 = gy0*(1-f) + gy1*f;
+  f = (double)nz0 / (double)gnz; g->z0 = gz0*(1-f) + gz1*f;
+
+  f = (double)nx1 / (double)gnx; g->x1 = gx0*(1-f) + gx1*f;
+  f = (double)ny1 / (double)gny; g->y1 = gy0*(1-f) + gy1*f;
+  f = (double)nz1 / (double)gnz; g->z1 = gz0*(1-f) + gz1*f;
 
   // Size the local grid
-  size_grid(g,gnx/gpx,gny/gpy,gnz/gpz);
+  size_grid(g,nx1-nx0,ny1-ny0,nz1-nz0);
 
   // Join the grid to neighbors
   INDEX_TO_RANK(px-1,py,  pz,  rank); join_grid(g,BOUNDARY((-1), 0, 0),rank);
@@ -113,30 +125,30 @@ partition_absorbing_box( grid_t * g,
 
   RANK_TO_INDEX( world_rank, px,py,pz );
 
-  if( px==0 && gnx>1 ) { 
+  if( px==0 && gnx>1 ) {
     set_fbc(g,BOUNDARY((-1),0,0),absorb_fields);
     set_pbc(g,BOUNDARY((-1),0,0),pbc);
-  } 
+  }
 
   if( px==gpx-1 && gnx>1 ) {
     set_fbc(g,BOUNDARY( 1,0,0),absorb_fields);
     set_pbc(g,BOUNDARY( 1,0,0),pbc);
   }
 
-  if( py==0 && gny>1 ) { 
+  if( py==0 && gny>1 ) {
     set_fbc(g,BOUNDARY(0,(-1),0),absorb_fields);
     set_pbc(g,BOUNDARY(0,(-1),0),pbc);
-  } 
+  }
 
   if( py==gpy-1 && gny>1 ) {
     set_fbc(g,BOUNDARY(0, 1,0),absorb_fields);
     set_pbc(g,BOUNDARY(0, 1,0),pbc);
   }
 
-  if( pz==0 && gnz>1 ) { 
+  if( pz==0 && gnz>1 ) {
     set_fbc(g,BOUNDARY(0,0,(-1)),absorb_fields);
     set_pbc(g,BOUNDARY(0,0,(-1)),pbc);
-  } 
+  }
 
   if( pz==gpz-1 && gnz>1 ) {
     set_fbc(g,BOUNDARY(0,0, 1),absorb_fields);
@@ -195,3 +207,264 @@ partition_metal_box( grid_t * g,
     set_pbc(g,BOUNDARY(0,0,1),reflect_particles);
   }
 }
+
+void
+partition_nonuniform_periodic_box( grid_t *g,
+                                   double gx0, double gy0, double gz0,
+                                   double gx1, double gy1, double gz1,
+                                   int gnx, int gny, int gnz,
+                                   int gpx, int gpy, int gpz,
+                                   Cost& c ) {
+  double f;
+  int rank, px, py, pz;
+
+  // Make sure the grid can be setup
+
+  if( !g ) ERROR(( "NULL grid" ));
+
+  if( gpx<1 || gpy<1 || gpz<1 || gpx*gpy*gpz!=world_size )
+    ERROR(( "Bad domain decompostion (%ix%ix%i)", gpx, gpy, gpz ));
+
+  if( gnx<1 || gny<1 || gnz<1 || gnx%gpx!=0 || gny%gpy!=0 || gnz%gpz!=0 )
+    ERROR(( "Bad resolution (%ix%ix%i) for domain decomposition",
+            gnx, gny, gnz, gpx, gpy, gpz ));
+
+  // Setup basic variables
+  RANK_TO_INDEX( world_rank, px,py,pz );
+
+  // Capture global processor decomposition
+  g->gnx = gnx;
+  g->gny = gny;
+  g->gnz = gnz;
+
+  g->dx = (gx1-gx0)/(double)gnx;
+  g->dy = (gy1-gy0)/(double)gny;
+  g->dz = (gz1-gz0)/(double)gnz;
+  g->dV = ((gx1-gx0)/(double)gnx)*
+          ((gy1-gy0)/(double)gny)*
+          ((gz1-gz0)/(double)gnz);
+
+  g->rdx =  (double)gnx/(gx1-gx0);
+  g->rdy =  (double)gny/(gy1-gy0);
+  g->rdz =  (double)gnz/(gz1-gz0);
+  g->r8V = ((double)gnx/(gx1-gx0))*
+           ((double)gny/(gy1-gy0))*
+           ((double)gnz/(gz1-gz0))*0.125;
+
+  // We assume that calling the cost function is cheap, so we will call it
+  // three times for every grid cell instead of storing the full 3d profile of
+  // per-cell costs in ram
+  float* xcost = new float[gnx+2];
+  float* ycost = new float[gny+2];
+  float* zcost = new float[gnz+2];
+
+  // Prefix sum of cost, integrated over y and z. (Should we take max instead of sum?)
+  xcost[0] = 0.;
+  for(int i = 0; i < gnx; i++) {
+    float sum = 0.;
+    for(int j = 0; j < gny; j++) {
+      for(int k = 0; k < gnz; k++) {
+        sum += c.cost(i,j,k);
+      }
+    }
+    xcost[i+1] = xcost[i] + sum;
+  }
+  xcost[gnx+1] = 1./0.;
+
+  ycost[0] = 0.;
+  for(int j = 0; j < gny; j++) {
+    float sum = 0.;
+    for(int i = 0; i < gny; i++) {
+      for(int k = 0; k < gnz; k++) {
+        sum += c.cost(i,j,k);
+      }
+    }
+    ycost[j+1] = ycost[j] + sum;
+  }
+  ycost[gny+1] = 1./0.;
+
+  zcost[0] = 0.;
+  for(int k = 0; k < gnz; k++) {
+    float sum = 0.;
+    for(int i = 0; i < gnz; i++) {
+      for(int j = 0; j < gny; j++) {
+        sum += c.cost(i,j,k);
+      }
+    }
+    zcost[k+1] = zcost[k] + sum;
+  }
+  zcost[gnz+1] = 1./0.;
+
+  // Scan for fair boundaries in x
+  int nx0 = 0;
+  int i = 0;
+  while(xcost[i]/xcost[gnx] <= px/float(gpx)) {
+    nx0 = i;
+    i++;
+  }
+  int nx1 = 0;
+  i = 0;
+  while(xcost[i]/xcost[gnx] <= (px+1)/float(gpx)) {
+    nx1 = i;
+    i++;
+  }
+
+  // Scan for fair boundaries in y
+  int ny0 = 0;
+  int j = 0;
+  while(ycost[j]/ycost[gny] <= py/float(gpy)) {
+    ny0 = j;
+    j++;
+  }
+  int ny1 = 0;
+  j = 0;
+  while(ycost[j]/ycost[gny] <= (py+1)/float(gpy)) {
+    ny1 = j;
+    j++;
+  }
+
+  // Scan for fair boundaries in z
+  int nz0 = 0;
+  int k = 0;
+  while(zcost[k]/zcost[gnz] <= pz/float(gpz)) {
+    nz0 = k;
+    k++;
+  }
+  int nz1 = 0;
+  k = 0;
+  while(zcost[k]/zcost[gnz] <= (pz+1)/float(gpz)) {
+    nz1 = k;
+    k++;
+  }
+
+  delete[] xcost;
+  delete[] ycost;
+  delete[] zcost;
+
+  g->nx0 = nx0;
+  g->ny0 = ny0;
+  g->nz0 = nz0;
+
+  f = (double)nx0 / (double)gnx; g->x0 = gx0*(1-f) + gx1*f;
+  f = (double)ny0 / (double)gny; g->y0 = gy0*(1-f) + gy1*f;
+  f = (double)nz0 / (double)gnz; g->z0 = gz0*(1-f) + gz1*f;
+
+  f = (double)nx1 / (double)gnx; g->x1 = gx0*(1-f) + gx1*f;
+  f = (double)ny1 / (double)gny; g->y1 = gy0*(1-f) + gy1*f;
+  f = (double)nz1 / (double)gnz; g->z1 = gz0*(1-f) + gz1*f;
+
+  // Size the local grid
+  size_grid(g,nx1-nx0,ny1-ny0,nz1-nz0);
+
+  // Join the grid to neighbors
+  INDEX_TO_RANK(px-1,py,  pz,  rank); join_grid(g,BOUNDARY((-1), 0, 0),rank);
+  INDEX_TO_RANK(px,  py-1,pz,  rank); join_grid(g,BOUNDARY( 0,(-1), 0),rank);
+  INDEX_TO_RANK(px,  py,  pz-1,rank); join_grid(g,BOUNDARY( 0, 0,(-1)),rank);
+  INDEX_TO_RANK(px+1,py,  pz,  rank); join_grid(g,BOUNDARY( 1, 0, 0),rank);
+  INDEX_TO_RANK(px,  py+1,pz,  rank); join_grid(g,BOUNDARY( 0, 1, 0),rank);
+  INDEX_TO_RANK(px,  py,  pz+1,rank); join_grid(g,BOUNDARY( 0, 0, 1),rank);
+}
+
+void
+partition_nonuniform_absorbing_box( grid_t * g,
+                                    double gx0, double gy0, double gz0,
+                                    double gx1, double gy1, double gz1,
+                                    int gnx, int gny, int gnz,
+                                    int gpx, int gpy, int gpz,
+                                    int pbc,
+                                    Cost& c ) {
+  int px, py, pz;
+
+  partition_nonuniform_periodic_box( g,
+                                     gx0, gy0, gz0,
+                                     gx1, gy1, gz1,
+                                     gnx, gny, gnz,
+                                     gpx, gpy, gpz,
+                                     c );
+
+  // Override periodic boundary conditions
+
+  RANK_TO_INDEX( world_rank, px,py,pz );
+
+  if( px==0 && gnx>1 ) {
+    set_fbc(g,BOUNDARY((-1),0,0),absorb_fields);
+    set_pbc(g,BOUNDARY((-1),0,0),pbc);
+  }
+
+  if( px==gpx-1 && gnx>1 ) {
+    set_fbc(g,BOUNDARY( 1,0,0),absorb_fields);
+    set_pbc(g,BOUNDARY( 1,0,0),pbc);
+  }
+
+  if( py==0 && gny>1 ) {
+    set_fbc(g,BOUNDARY(0,(-1),0),absorb_fields);
+    set_pbc(g,BOUNDARY(0,(-1),0),pbc);
+  }
+
+  if( py==gpy-1 && gny>1 ) {
+    set_fbc(g,BOUNDARY(0, 1,0),absorb_fields);
+    set_pbc(g,BOUNDARY(0, 1,0),pbc);
+  }
+
+  if( pz==0 && gnz>1 ) {
+    set_fbc(g,BOUNDARY(0,0,(-1)),absorb_fields);
+    set_pbc(g,BOUNDARY(0,0,(-1)),pbc);
+  }
+
+  if( pz==gpz-1 && gnz>1 ) {
+    set_fbc(g,BOUNDARY(0,0, 1),absorb_fields);
+    set_pbc(g,BOUNDARY(0,0, 1),pbc);
+  }
+}
+
+void
+partition_nonuniform_metal_box( grid_t * g,
+                                double gx0, double gy0, double gz0,
+                                double gx1, double gy1, double gz1,
+                                int gnx, int gny, int gnz,
+                                int gpx, int gpy, int gpz,
+                                Cost& c) {
+  int px, py, pz;
+
+  partition_nonuniform_periodic_box( g,
+                                     gx0, gy0, gz0,
+                                     gx1, gy1, gz1,
+                                     gnx, gny, gnz,
+                                     gpx, gpy, gpz,
+                                     c );
+
+  // Override periodic boundary conditions
+
+  RANK_TO_INDEX( world_rank, px,py,pz );
+
+  if( px==0 && gnx>1 ) {
+    set_fbc(g,BOUNDARY((-1),0,0),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY((-1),0,0),reflect_particles);
+  }
+
+  if( px==gpx-1 && gnx>1 ) {
+    set_fbc(g,BOUNDARY(1,0,0),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY(1,0,0),reflect_particles);
+  }
+
+  if( py==0 && gny>1 ) {
+    set_fbc(g,BOUNDARY(0,(-1),0),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY(0,(-1),0),reflect_particles);
+  }
+
+  if( py==gpy-1 && gny>1 ) {
+    set_fbc(g,BOUNDARY(0,1,0),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY(0,1,0),reflect_particles);
+  }
+
+  if( pz==0 && gnz>1 ) {
+    set_fbc(g,BOUNDARY(0,0,(-1)),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY(0,0,(-1)),reflect_particles);
+  }
+
+  if( pz==gpz-1 && gnz>1 ) {
+    set_fbc(g,BOUNDARY(0,0,1),anti_symmetric_fields);
+    set_pbc(g,BOUNDARY(0,0,1),reflect_particles);
+  }
+}
+
diff --git a/src/vpic/dump.cc b/src/vpic/dump.cc
index 6bda20da..a263989c 100644
--- a/src/vpic/dump.cc
+++ b/src/vpic/dump.cc
@@ -283,22 +283,6 @@ vpic_simulation::dump_hydro( const char *sp_name,
 #ifdef VPIC_ENABLE_HDF5
 #define DUMP_DIR_FORMAT "./%s"
 
-// TODO: rename or remove this
-#define RANK_TO_INDEX2(rank, ix, iy, iz)                                      \
-    BEGIN_PRIMITIVE                                                           \
-    {                                                                         \
-        int _ix, _iy, _iz;                                                    \
-        _ix = (rank);                         /* ix = ix+gpx*( iy+gpy*iz ) */ \
-        _iy = _ix / grid->gpx;  /* iy = iy+gpy*iz */            \
-        _ix -= _iy * grid->gpx; /* ix = ix */                   \
-        _iz = _iy / grid->gpy;  /* iz = iz */                   \
-        _iy -= _iz * grid->gpy; /* iy = iy */                   \
-        (ix) = _ix;                                                           \
-        (iy) = _iy;                                                           \
-        (iz) = _iz;                                                           \
-    }                                                                         \
-    END_PRIMITIVE
-
 /* define to do C-style indexing */
 #define hydro(x, y, z) hydro_array->h[VOXEL(x, y, z, grid->nx, grid->ny, grid->nz)]
 
@@ -397,19 +381,8 @@ vpic_simulation::dump_fields_hdf5( const char *fbase, int ftag )
     hid_t group_id = H5Gcreate(file_id, fname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
 
     el1 = uptime() - el1;
-    //sim_log("TimeHDF5Open): " << el1 << " s"); //Easy to handle results for scripts
     double el2 = uptime();
 
-    /*
-// Create a variable list of field values to output.
-size_t numvars = std::min(global->fdParams.output_vars.bitsum(), total_field_variables);
-size_t * varlist = new size_t[numvars];
-
-for(size_t i(0), c(0); i<total_field_variables; i++)
-  if(global->fdParams.output_vars.bitset(i)) varlist[c++] = i;
-
-printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/
-
 #define fpp(x, y, z) f[VOXEL(x, y, z, grid->nx, grid->ny, grid->nz)]
     /*
     typedef struct field {
@@ -420,6 +393,7 @@ printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/
     material_id ematx, ematy, ematz, nmat; // Material at edge centers and nodes
     material_id fmatx, fmaty, fmatz, cmat; // Material at face and cell centers
     } field_t;*/
+
     // Local voxel mesh resolution.  Voxels are
     // indexed FORTRAN style 0:nx+1,0:ny+1,0:nz+1
     // with voxels 1:nx,1:ny,1:nz being non-ghost
@@ -428,57 +402,31 @@ printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/
     float *temp_buf = (float *)malloc(sizeof(float) * (grid->nx) * (grid->ny) * (grid->nz));
     hsize_t temp_buf_index;
     hid_t dset_id;
-    //char  *field_var_name[] = {"ex","ey","ez","div_e_err","cbx","cby","cbz","div_b_err","tcax","tcay","tcaz","rhob","jfx","jfy","jfz","rhof"};
     plist_id = H5Pcreate(H5P_DATASET_XFER);
-    //Comment out for test only
     H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
-    //H5Sselect_hyperslab(filespace, H5S_SELECT_SET, (hsize_t *) &offset, NULL, (hsize_t *) &numparticles, NULL);
-
-    //global->topology_x
 
     hsize_t field_global_size[3], field_local_size[3], global_offset[3], global_count[3];
-    field_global_size[0] = (grid->nx * grid->gpx);
-    field_global_size[1] = (grid->ny * grid->gpy);
-    field_global_size[2] = (grid->nz * grid->gpz);
+    field_global_size[0] = grid->gnx;
+    field_global_size[1] = grid->gny;
+    field_global_size[2] = grid->gnz;
 
     field_local_size[0] = grid->nx;
     field_local_size[1] = grid->ny;
     field_local_size[2] = grid->nz;
 
-    int gpx = grid->gpx;
-    int gpy = grid->gpy;
-    int gpz = grid->gpz;
-
-    int mpi_rank_x, mpi_rank_y, mpi_rank_z;
-    //RANK_TO_INDEX2(mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z);
-
-    int _ix, _iy, _iz;
-    _ix = (mpi_rank);
-    _iy = _ix / grid->gpx;
-    _ix -= _iy * grid->gpx;
-    _iz = _iy / grid->gpy;
-    _iy -= _iz * grid->gpy;
-    int ix = _ix;
-    int iy = _iy;
-    int iz = _iz;
-
-    mpi_rank_x = ix;
-    mpi_rank_y = iy;
-    mpi_rank_z = iz;
-
-    global_offset[0] = (grid->nx) * mpi_rank_x;
-    global_offset[1] = (grid->ny) * mpi_rank_y;
-    global_offset[2] = (grid->nz) * mpi_rank_z;
+    global_offset[0] = grid->nx0;
+    global_offset[1] = grid->ny0;
+    global_offset[2] = grid->nz0;
 
-    global_count[0] = (grid->nx);
-    global_count[1] = (grid->ny);
-    global_count[2] = (grid->nz);
+    global_count[0] = grid->nx;
+    global_count[1] = grid->ny;
+    global_count[2] = grid->nz;
 
 #ifdef DUMP_INFO_DEBUG
     printf("global size   = %d  %d %d \n", field_global_size[0], field_global_size[1], field_global_size[2]);
     printf("global_offset = %d %d %d \n", global_offset[0], global_offset[1], global_offset[2]);
     printf("global_count  = %d  %d %d \n", global_count[0], global_count[1], global_count[2]);
-    printf("mpi-rank = %d, rank index = (%d, %d, %d) \n", mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z);
+    printf("mpi-rank = %d\n", mpi_rank);
     fflush(stdout);
 #endif
 
@@ -706,17 +654,6 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname,
     //sim_log("TimeHDF5Open: " << el1 << " s"); //Easy to handle results for scripts
     //double el2 = uptime();
 
-    // Create a variable list of field values to output.
-    //size_t numvars = std::min(global->fdParams.output_vars.bitsum(), total_field_variables);
-    //size_t *varlist = new size_t[numvars];
-
-    //for (size_t i(0), c(0); i < total_field_variables; i++)
-    //    if (global->fdParams.output_vars.bitset(i))
-    //        varlist[c++] = i;
-
-    //printf("\nBEGIN_OUTPUT: numvars = %zu \n", numvars);
-
-
     //typedef struct hydro {
     //  float jx, jy, jz, rho; // Current and charge density => <q v_i f>, <q f>
     //  float px, py, pz, ke;  // Momentum and K.E. density  => <p_i f>, <m c^2 (gamma-1) f>
@@ -733,29 +670,21 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname,
     float *temp_buf = (float *)malloc(sizeof(float) * (grid->nx) * (grid->ny) * (grid->nz));
     hsize_t temp_buf_index;
     hid_t dset_id;
-    //char  *field_var_name[] = {"ex","ey","ez","div_e_err","cbx","cby","cbz","div_b_err","tcax","tcay","tcaz","rhob","jfx","jfy","jfz","rhof"};
     plist_id = H5Pcreate(H5P_DATASET_XFER);
-    //Comment out for test only
     H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
-    //H5Sselect_hyperslab(filespace, H5S_SELECT_SET, (hsize_t *) &offset, NULL, (hsize_t *) &numparticles, NULL);
-
-    //global->topology_x
 
     hsize_t hydro_global_size[3], hydro_local_size[3], global_offset[3], global_count[3];
-    hydro_global_size[0] = (grid->nx * grid->gpx);
-    hydro_global_size[1] = (grid->ny * grid->gpy);
-    hydro_global_size[2] = (grid->nz * grid->gpz);
+    hydro_global_size[0] = grid->gnx;
+    hydro_global_size[1] = grid->gny;
+    hydro_global_size[2] = grid->gnz;
 
     hydro_local_size[0] = grid->nx;
     hydro_local_size[1] = grid->ny;
     hydro_local_size[2] = grid->nz;
 
-    int mpi_rank_x, mpi_rank_y, mpi_rank_z;
-    RANK_TO_INDEX2(mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z);
-
-    global_offset[0] = (grid->nx) * mpi_rank_x;
-    global_offset[1] = (grid->ny) * mpi_rank_y;
-    global_offset[2] = (grid->nz) * mpi_rank_z;
+    global_offset[0] = grid->nx0;
+    global_offset[1] = grid->ny0;
+    global_offset[2] = grid->nz0;
 
     global_count[0] = (grid->nx);
     global_count[1] = (grid->ny);
@@ -765,7 +694,7 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname,
     printf("global size   = %d %d %d \n", hydro_global_size[0], hydro_global_size[1], hydro_global_size[2]);
     printf("global_offset = %d %d %d \n", global_offset[0], global_offset[1], global_offset[2]);
     printf("global_count  = %d %d %d \n", global_count[0], global_count[1], global_count[2]);
-    printf("mpi-rank = %d, rank index = (%d, %d, %d) \n", mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z);
+    printf("mpi-rank = %d\n", mpi_rank);
     fflush(stdout);
 #endif
 
@@ -1016,11 +945,8 @@ _iy -= _iz*int(ny);   /* iy = iy */                         \
     {
         int local_i = sp->p[i].i;
 
-        int ix, iy, iz, rx, ry, rz;
-        // Convert rank to local x/y/z
-        UNVOXEL(mpi_rank, rx, ry, rz, grid->gpx, grid->gpy, grid->gpz);
-
         // Calculate local ix/iy/iz
+        int ix, iy, iz;
         UNVOXEL(local_i, ix, iy, iz, grid->nx+2, grid->ny+2, grid->nz+2);
 
         // Account for the "first" ghost cell
@@ -1029,14 +955,14 @@ _iy -= _iz*int(ny);   /* iy = iy */                         \
         iz = iz - 1;
 
         // Convert ix/iy/iz to global
-        int gix = ix + (grid->nx * rx);
-        int giy = iy + (grid->ny * ry);
-        int giz = iz + (grid->nz * rz);
+        int gix = ix + grid->nx0;
+        int giy = iy + grid->ny0;
+        int giz = iz + grid->nz0;
 
-        // calculate global grid sizes
-        int gnx = grid->nx * grid->gpx;
-        int gny = grid->ny * grid->gpy;
-        int gnz = grid->nz * grid->gpz;
+        // global grid sizes
+        int gnx = grid->gnx;
+        int gny = grid->gny;
+        int gnz = grid->gnz;
 
         // TODO: find a better way to account for the hard coded ghosts in VOXEL
         int global_i = VOXEL(gix, giy, giz, gnx-2, gny-2, gnz-2);
@@ -1440,7 +1366,7 @@ vpic_simulation::global_header( const char * base,
 
 void
 vpic_simulation::field_dump( DumpParameters & dumpParams,
-			     field_t *f,
+                             field_t *f,
                              int64_t userStep )
 {
   long dumpStep = ( userStep == -1 ) ? (long) step() : userStep;
@@ -1541,8 +1467,8 @@ vpic_simulation::field_dump( DumpParameters & dumpParams,
 
     // more efficient for standard case
     if ( istride == 1 &&
-	 jstride == 1 &&
-	 kstride == 1 )
+         jstride == 1 &&
+         kstride == 1 )
     {
       for(size_t v(0); v<numvars; v++) {
       for(size_t k(0); k<nzout+2; k++) {
@@ -1590,8 +1516,8 @@ vpic_simulation::field_dump( DumpParameters & dumpParams,
     WRITE_ARRAY_HEADER(f, 3, dim, fileIO);
 
     if ( istride == 1 &&
-	 jstride == 1 &&
-	 kstride == 1 )
+         jstride == 1 &&
+         kstride == 1 )
     {
       fileIO.write( f, dim[0] * dim[1] * dim[2] );
     }
@@ -1752,8 +1678,8 @@ vpic_simulation::hydro_dump( const char * speciesname,
     WRITE_ARRAY_HEADER(h, 3, dim, fileIO);
 
     if ( istride == 1 &&
-	 jstride == 1 &&
-	 kstride == 1 )
+         jstride == 1 &&
+         kstride == 1 )
     {
       fileIO.write( h, dim[0] * dim[1] * dim[2] );
     }
diff --git a/src/vpic/vpic.h b/src/vpic/vpic.h
index ccf8ea26..df67932a 100644
--- a/src/vpic/vpic.h
+++ b/src/vpic/vpic.h
@@ -534,6 +534,45 @@ class vpic_simulation {
                          (int)gpx, (int)gpy, (int)gpz );
   }
 
+  // The below functions automatically create partition grids with simple
+  // boundary conditions on the edges and roughly equal costs based on the
+  // provided spatial cost function.
+  inline void
+  define_nonuniform_periodic_grid( double xl,  double yl,  double zl,
+                                   double xh,  double yh,  double zh,
+                                   double gnx, double gny, double gnz,
+                                   double gpx, double gpy, double gpz,
+                                   Cost& c ) {
+        px = size_t(gpx); py = size_t(gpy); pz = size_t(gpz);
+    partition_nonuniform_periodic_box( grid, xl, yl, zl, xh, yh, zh,
+                                       (int)gnx, (int)gny, (int)gnz,
+                                       (int)gpx, (int)gpy, (int)gpz, c );
+  }
+
+  inline void
+  define_nonuniform_absorbing_grid( double xl,  double yl,  double zl,
+                                    double xh,  double yh,  double zh,
+                                    double gnx, double gny, double gnz,
+                                    double gpx, double gpy, double gpz,
+                                    Cost& c ) {
+        px = size_t(gpx); py = size_t(gpy); pz = size_t(gpz);
+    partition_nonuniform_absorbing_box( grid, xl, yl, zl, xh, yh, zh,
+                                        (int)gnx, (int)gny, (int)gnz,
+                                        (int)gpx, (int)gpy, (int)gpz, c );
+  }
+
+  inline void
+  define_nonuniform_reflecting_grid( double xl,  double yl,  double zl,
+                                     double xh,  double yh,  double zh,
+                                     double gnx, double gny, double gnz,
+                                     double gpx, double gpy, double gpz,
+                                     Cost& c ) {
+        px = size_t(gpx); py = size_t(gpy); pz = size_t(gpz);
+    partition_nonuniform_metal_box( grid, xl, yl, zl, xh, yh, zh,
+                                    (int)gnx, (int)gny, (int)gnz,
+                                    (int)gpx, (int)gpy, (int)gpz, c );
+  }
+
   // The below macros allow custom domains to be created
 
   // Creates a particle reflecting metal box in the local domain