From c4cc7c0697b41e9f4d50a802b284e0137f2e025d Mon Sep 17 00:00:00 2001 From: Patrick Kilian Date: Thu, 4 Feb 2021 16:38:41 -0700 Subject: [PATCH] Allow unequal domain decomposition into chunks of roughly equal cost. Cost is supplied by the user through a cost function. In the example of the harris current sheet this improves parallelization efficancy on 8 cores from 25% to 80% which is well in the range that I get when I split in y or z using either old or new code. --- sample/harrisSplit | 361 ++++++++++++++++++++++++++++++++++++++++++ src/grid/grid.h | 35 +++- src/grid/partition.cc | 305 +++++++++++++++++++++++++++++++++-- src/vpic/dump.cc | 140 ++++------------ src/vpic/vpic.h | 39 +++++ 5 files changed, 755 insertions(+), 125 deletions(-) create mode 100644 sample/harrisSplit diff --git a/sample/harrisSplit b/sample/harrisSplit new file mode 100644 index 00000000..b070295e --- /dev/null +++ b/sample/harrisSplit @@ -0,0 +1,361 @@ +// Magnetic reconnection in a Harris equilibrium thin current sheet +// +// This input deck reproduces the PIC simulations found in: +// William Daughton. "Nonlinear dynamics of thin current sheets." Phys. +// Plasmas. 9(9): 3668-3678. September 2002. +// +// This input deck was written by: +// Kevin J Bowers, Ph.D. +// Plasma Physics Group (X-1) +// Applied Physics Division +// Los Alamos National Lab +// August 2003 - original version +// October 2003 - heavily revised to utilize input deck syntactic sugar +// March/April 2004 - rewritten for domain decomposition V4PIC + +// rough density estimate over the whole run of the simulation to split the +// domain into chunks that have roughly the same number of particles, despite +// the density bump inside the current sheet +class MyCost: public Cost { + private: + const float l = 4.37; // Rough thickness at later times + + public: + float cost(const int ix, const int iy, const int iz) { + return pow(cosh((ix-32.)/l),-2); + } +}; + +// If you want to use global variables (for example, to store the dump +// intervals for your diagnostics section), it must be done in the globals +// section. Variables declared the globals section will be preserved across +// restart dumps. For example, if the globals section is: +// begin_globals { +// double variable; +// } end_globals +// the double "variable" will be visible to other input deck sections as +// "global->variable". Note: Variables declared in the globals section are set +// to zero before the user's initialization block is executed. Up to 16K +// of global variables can be defined. + +begin_globals { + double energies_interval; + double fields_interval; + double ehydro_interval; + double ihydro_interval; + double eparticle_interval; + double iparticle_interval; + double restart_interval; +}; + +begin_initialization { + // At this point, there is an empty grid and the random number generator is + // seeded with the rank. The grid, materials, species need to be defined. + // Then the initial non-zero fields need to be loaded at time level 0 and the + // particles (position and momentum both) need to be loaded at time level 0. + + double input_mass_ratio; + int input_seed; + + // Arguments can be passed from the command line to the input deck + if( num_cmdline_arguments!=3 ) { + // Set sensible defaults + input_mass_ratio = 1.0; + input_seed = 0; + + sim_log( "Defaulting to mass_ratio of " << input_mass_ratio << " and seed of " << input_seed ); + sim_log( "For Custom Usage: " << cmdline_argument[0] << " mass_ratio seed" ); + } + else { + input_mass_ratio = atof(cmdline_argument[1]); // Ion mass / electron mass + input_seed = atof(cmdline_argument[2]); // Ion mass / electron mass + sim_log( "Detected input mass_ratio of " << input_mass_ratio << " and seed of " << input_seed ); + } + seed_entropy( input_seed ); + + // Diagnostic messages can be passed written (usually to stderr) + sim_log( "Computing simulation parameters"); + + // Define the system of units for this problem (natural units) + double L = 1; // Length normalization (sheet thickness) + double ec = 1; // Charge normalization + double me = 1; // Mass normalization + double c = 1; // Speed of light + double eps0 = 1; // Permittivity of space + + // Physics parameters + double mi_me = input_mass_ratio; // Ion mass / electron mass + double rhoi_L = 1; // Ion thermal gyroradius / Sheet thickness + double Ti_Te = 1; // Ion temperature / electron temperature + double wpe_wce = 3; // Electron plasma freq / electron cycltron freq + double theta = 0; // Orientation of the simulation wrt current sheet + double taui = 100; // Simulation wci's to run + + // Numerical parameters + double Lx = 16*L; // How big should the box be in the x direction + double Ly = 16*L; // How big should the box be in the y direction + double Lz = 16*L; // How big should the box be in the z direction + double nx = 64; // Global resolution in the x direction + double ny = 64; // Global resolution in the y direction + double nz = 64; // Global resolution in the z direction + double nppc = 200; // Average number of macro particles per cell (both species combined!) + double cfl_req = 0.99; // How close to Courant should we try to run + double wpedt_max = 0.36; // How big a timestep is allowed if Courant is not too restrictive + double damp = 0.001; // Level of radiation damping + + // Derived quantities + double mi = me*mi_me; // Ion mass + double kTe = me*c*c/(2*wpe_wce*wpe_wce*(1+Ti_Te)); // Electron temperature + double kTi = kTe*Ti_Te; // Ion temperature + double vthe = sqrt(2*kTe/me); // Electron thermal velocity (B.D. convention) + double vthi = sqrt(2*kTi/mi); // Ion thermal velocity (B.D. convention) + double wci = vthi/(rhoi_L*L); // Ion cyclotron frequency + double wce = wci*mi_me; // Electron cyclotron frequency + double wpe = wce*wpe_wce; // Electron plasma frequency + double wpi = wpe/sqrt(mi_me); // Ion plasma frequency + double vdre = c*c*wce/(wpe*wpe*L*(1+Ti_Te)); // Electron drift velocity + double vdri = -Ti_Te*vdre; // Ion drift velocity + double b0 = me*wce/ec; // Asymptotic magnetic field strength + double n0 = me*eps0*wpe*wpe/(ec*ec); // Peak electron density (also peak ion density) + double Npe = 2*n0*Ly*Lz*L*tanh(0.5*Lx/L); // Number of physical electrons in box + double Npi = Npe; // Number of physical ions in box + double Ne = 0.5*nppc*nx*ny*nz; // Total macro electrons in box + Ne = trunc_granular(Ne,nproc()); // Make it divisible by number of processors + double Ni = Ne; // Total macro ions in box + double we = Npe/Ne; // Weight of a macro electron + double wi = Npi/Ni; // Weight of a macro ion + double gdri = 1/sqrt(1-vdri*vdri/(c*c)); // gamma of ion drift frame + double gdre = 1/sqrt(1-vdre*vdre/(c*c)); // gamma of electron drift frame + double udri = vdri*gdri; // 4-velocity of ion drift frame + double udre = vdre*gdre; // 4-velocity of electron drift frame + double uthi = sqrt(kTi/mi)/c; // Normalized ion thermal velocity (K.B. convention) + double uthe = sqrt(kTe/me)/c; // Normalized electron thermal velocity (K.B. convention) + double cs = cos(theta); + double sn = sin(theta); + + // Determine the timestep + double dg = courant_length(Lx,Ly,Lz,nx,ny,nz); // Courant length + double dt = cfl_req*dg/c; // Courant limited time step + if( wpe*dt>wpedt_max ) dt=wpedt_max/wpe; // Override time step if plasma frequency limited + + //////////////////////////////////////// + // Setup high level simulation parmeters + + num_step = int(0.2*taui/(wci*dt)); + status_interval = int(1./(wci*dt)); + sync_shared_interval = status_interval; + clean_div_e_interval = status_interval; + clean_div_b_interval = status_interval; + + global->energies_interval = status_interval; + global->fields_interval = status_interval; + global->ehydro_interval = status_interval; + global->ihydro_interval = status_interval; + global->eparticle_interval = status_interval; + global->iparticle_interval = status_interval; + global->restart_interval = status_interval; + + /////////////////////////// + // Setup the space and time + + // Setup basic grid parameters + define_units( c, eps0 ); + define_timestep( dt ); + + const int topo_x = 1; + const int topo_y = 1; + const int topo_z = nproc(); + MyCost particlecounts; + + // Parition a periodic box among the processors + define_nonuniform_periodic_grid( -0.5*Lx, 0, 0, // Low corner + 0.5*Lx, Ly, Lz, // High corner + nx, ny, nz, // Resolution + topo_x, topo_y, topo_z, // Topology + particlecounts); // Split smarter + + // Override some of the boundary conditions to put a particle reflecting + // perfect electrical conductor on the -x and +x boundaries + set_domain_field_bc( BOUNDARY(-1,0,0), pec_fields ); + set_domain_field_bc( BOUNDARY( 1,0,0), pec_fields ); + set_domain_particle_bc( BOUNDARY(-1,0,0), reflect_particles ); + set_domain_particle_bc( BOUNDARY( 1,0,0), reflect_particles ); + + define_material( "vacuum", 1 ); + // Note: define_material defaults to isotropic materials with mu=1,sigma=0 + // Tensor electronic, magnetic and conductive materials are supported + // though. See "shapes" for how to define them and assign them to regions. + // Also, space is initially filled with the first material defined. + + // If you pass NULL to define field array, the standard field array will + // be used (if damp is not provided, no radiation damping will be used). + define_field_array( NULL, damp ); + + //////////////////// + // Setup the species + + // Allow 50% more local_particles in case of non-uniformity + // VPIC will pick the number of movers to use for each species + // Both species use out-of-place sorting + species_t * ion = define_species( "ion", ec, mi, 1.5*Ni/(topo_y*topo_z), -1, 40, 1 ); + species_t * electron = define_species( "electron", -ec, me, 1.5*Ne/(topo_y*topo_z), -1, 20, 1 ); + + /////////////////////////////////////////////////// + // Log diagnostic information about this simulation + + sim_log( "" ); + sim_log( "System of units" ); + sim_log( "L = " << L ); + sim_log( "ec = " << ec ); + sim_log( "me = " << me ); + sim_log( "c = " << c ); + sim_log( "eps0 = " << eps0 ); + sim_log( "" ); + sim_log( "Physics parameters" ); + sim_log( "rhoi/L = " << rhoi_L ); + sim_log( "Ti/Te = " << Ti_Te ); + sim_log( "wpe/wce = " << wpe_wce ); + sim_log( "mi/me = " << mi_me ); + sim_log( "theta = " << theta ); + sim_log( "taui = " << taui ); + sim_log( "" ); + sim_log( "Numerical parameters" ); + sim_log( "num_step = " << num_step ); + sim_log( "dt = " << dt ); + sim_log( "Lx = " << Lx << ", Lx/L = " << Lx/L ); + sim_log( "Ly = " << Ly << ", Ly/L = " << Ly/L ); + sim_log( "Lz = " << Lz << ", Lz/L = " << Lz/L ); + sim_log( "nx = " << nx << ", dx = " << Lx/nx << ", L/dx = " << L*nx/Lx ); + sim_log( "ny = " << ny << ", dy = " << Ly/ny << ", L/dy = " << L*ny/Ly ); + sim_log( "nz = " << nz << ", dz = " << Lz/nz << ", L/dz = " << L*nz/Lz ); + sim_log( "nppc = " << nppc ); + sim_log( "courant = " << c*dt/dg ); + sim_log( "damp = " << damp ); + sim_log( "" ); + sim_log( "Ion parameters" ); + sim_log( "qpi = " << ec << ", mi = " << mi << ", qpi/mi = " << ec/mi ); + sim_log( "vthi = " << vthi << ", vthi/c = " << vthi/c << ", kTi = " << kTi ); + sim_log( "vdri = " << vdri << ", vdri/c = " << vdri/c ); + sim_log( "wpi = " << wpi << ", wpi dt = " << wpi*dt << ", n0 = " << n0 ); + sim_log( "wci = " << wci << ", wci dt = " << wci*dt ); + sim_log( "rhoi = " << vthi/wci << ", L/rhoi = " << L/(vthi/wci) << ", dx/rhoi = " << (Lx/nx)/(vthi/wci) ); + sim_log( "debyei = " << vthi/wpi << ", L/debyei = " << L/(vthi/wpi) << ", dx/debyei = " << (Lx/nx)/(vthi/wpi) ); + sim_log( "Npi = " << Npi << ", Ni = " << Ni << ", Npi/Ni = " << Npi/Ni << ", wi = " << wi ); + sim_log( "" ); + sim_log( "Electron parameters" ); + sim_log( "qpe = " << -ec << ", me = " << me << ", qpe/me = " << -ec/me ); + sim_log( "vthe = " << vthe << ", vthe/c = " << vthe/c << ", kTe = " << kTe ); + sim_log( "vdre = " << vdre << ", vdre/c = " << vdre/c ); + sim_log( "wpe = " << wpe << ", wpe dt = " << wpe*dt << ", n0 = " << n0 ); + sim_log( "wce = " << wce << ", wce dt = " << wce*dt ); + sim_log( "rhoe = " << vthe/wce << ", L/rhoe = " << L/(vthe/wce) << ", dx/rhoe = " << (Lx/nx)/(vthe/wce) ); + sim_log( "debyee = " << vthe/wpe << ", L/debyee = " << L/(vthe/wpe) << ", dx/debyee = " << (Lx/nx)/(vthe/wpe) ); + sim_log( "Npe = " << Npe << ", Ne = " << Ne << ", Npe/Ne = " << Npe/Ne << ", we = " << we ); + sim_log( "" ); + sim_log( "Miscellaneous" ); + sim_log( "nptotal = " << Ni + Ne ); + sim_log( "nproc = " << nproc() ); + sim_log( "" ); + + //////////////////////////// + // Load fields and particles + + sim_log( "Loading fields" ); + + set_region_field( everywhere, 0, 0, 0, // Electric field + 0, -sn*b0*tanh(x/L), cs*b0*tanh(x/L) ); // Magnetic field + // Note: everywhere is a region that encompasses the entire simulation + // In general, regions are specied as logical equations (i.e. x>0 && x+y<2) + + sim_log( "Loading particles" ); + + + repeat( Ni/(topo_y*topo_z) ) { + double x, y, z, ux, uy, uz, d0; + + // Pick an appropriately distributed random location for the pair + do { + x = L*atanh( uniform( rng(0), -1, 1 ) ); + } while( x<=-0.5*Lx || x>=0.5*Lx ); + y = uniform( rng(0), grid->y0, grid->y1 ); + z = uniform( rng(0), grid->z0, grid->z1 ); + + // For the ion, pick an isothermal normalized momentum in the drift frame + // (this is a proper thermal equilibrium in the non-relativistic limit), + // boost it from the drift frame to the frame with the magnetic field + // along z and then rotate it into the lab frame. Then load the particle. + // Repeat the process for the electron. + + ux = normal( rng(0), 0, uthi ); + uy = normal( rng(0), 0, uthi ); + uz = normal( rng(0), 0, uthi ); + d0 = gdri*uy + sqrt(ux*ux+uy*uy+uz*uz+1)*udri; + uy = d0*cs - uz*sn; + uz = d0*sn + uz*cs; + inject_particle( ion, x, y, z, ux, uy, uz, wi, 0, 0 ); + + ux = normal( rng(0), 0, uthe ); + uy = normal( rng(0), 0, uthe ); + uz = normal( rng(0), 0, uthe ); + d0 = gdre*uy + sqrt(ux*ux+uy*uy+uz*uz+1)*udre; + uy = d0*cs - uz*sn; + uz = d0*sn + uz*cs; + inject_particle( electron, x, y, z, ux, uy, uz, we, 0, 0 ); + } + fprintf(stderr, "GREP %d electrons, %d protons on rank %d\n", electron->np, ion->np, rank()); + + // Upon completion of the initialization, the following occurs: + // - The synchronization error (tang E, norm B) is computed between domains + // and tang E / norm B are synchronized by averaging where discrepancies + // are encountered. + // - The initial divergence error of the magnetic field is computed and + // one pass of cleaning is done (for good measure) + // - The bound charge density necessary to give the simulation an initially + // clean divergence e is computed. + // - The particle momentum is uncentered from u_0 to u_{-1/2} + // - The user diagnostics are called on the initial state + // - The physics loop is started + // + // The physics loop consists of: + // - Advance particles from x_0,u_{-1/2} to x_1,u_{1/2} + // - User particle injection at x_{1-age}, u_{1/2} (use inject_particles) + // - User current injection (adjust field(x,y,z).jfx, jfy, jfz) + // - Advance B from B_0 to B_{1/2} + // - Advance E from E_0 to E_1 + // - User field injection to E_1 (adjust field(x,y,z).ex,ey,ez,cbx,cby,cbz) + // - Advance B from B_{1/2} to B_1 + // - (periodically) Divergence clean electric field + // - (periodically) Divergence clean magnetic field + // - (periodically) Synchronize shared tang e and norm b + // - Increment the time step + // - Call user diagnostics + // - (periodically) Print a status message +} + +begin_diagnostics { + // Removed for benchmarking +} + +begin_particle_injection { + + // No particle injection for this simulation + +} + +begin_current_injection { + + // No current injection for this simulation + +} + +begin_field_injection { + + // No field injection for this simulation + +} + +begin_particle_collisions{ + + // No collisions for this simulation + +} diff --git a/src/grid/grid.h b/src/grid/grid.h index 3167c7e6..264c7a61 100644 --- a/src/grid/grid.h +++ b/src/grid/grid.h @@ -99,8 +99,8 @@ typedef struct grid { // 0 ... nproc-1 ... comm boundary condition // <0 ... locally applied boundary condition - int gpx, gpy, gpz = -1; // Store global processor decomposition to let us figure - // out where we are in the global decomposition + int gnx, gny, gnz = -1; // Global size of domain + int nx0, ny0, nz0 = -1; // Min cell local domain // Phase 3 grid data structures // NOTE: VOXEL INDEXING LIMITS NUMBER OF VOXELS TO 2^31 (INCLUDING @@ -243,6 +243,37 @@ partition_metal_box( grid_t *g, int gnx, int gny, int gnz, int gpx, int gpy, int gpz ); +END_C_DECLS +class Cost { + public: + virtual float cost(const int ix, const int iy, const int iz) = 0; +}; + +void +partition_nonuniform_periodic_box( grid_t *g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + Cost& c ); + +void +partition_nonuniform_absorbing_box( grid_t *g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + Cost& c ); + +void +partition_nonuniform_metal_box( grid_t *g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + Cost& c ); + +BEGIN_C_DECLS // In grid_comm.c // FIXME: SHOULD TAKE A RAW PORT INDEX INSTEAD OF A PORT COORDS diff --git a/src/grid/partition.cc b/src/grid/partition.cc index e95f7225..8f6c9d61 100644 --- a/src/grid/partition.cc +++ b/src/grid/partition.cc @@ -56,9 +56,9 @@ partition_periodic_box( grid_t * g, RANK_TO_INDEX( world_rank, px,py,pz ); // Capture global processor decomposition - g->gpx = gpx; - g->gpy = gpy; - g->gpz = gpz; + g->gnx = gnx; + g->gny = gny; + g->gnz = gnz; g->dx = (gx1-gx0)/(double)gnx; g->dy = (gy1-gy0)/(double)gny; @@ -74,16 +74,28 @@ partition_periodic_box( grid_t * g, ((double)gny/(gy1-gy0))* ((double)gnz/(gz1-gz0))*0.125; - f = (double) px /(double)gpx; g->x0 = gx0*(1-f) + gx1*f; - f = (double) py /(double)gpy; g->y0 = gy0*(1-f) + gy1*f; - f = (double) pz /(double)gpz; g->z0 = gz0*(1-f) + gz1*f; + int nx0 = (gnx * px) / gpx; + int ny0 = (gny * py) / gpy; + int nz0 = (gnz * pz) / gpz; - f = (double)(px+1)/(double)gpx; g->x1 = gx0*(1-f) + gx1*f; - f = (double)(py+1)/(double)gpy; g->y1 = gy0*(1-f) + gy1*f; - f = (double)(pz+1)/(double)gpz; g->z1 = gz0*(1-f) + gz1*f; + int nx1 = (gnx * (px+1)) / gpx; + int ny1 = (gny * (py+1)) / gpy; + int nz1 = (gnz * (pz+1)) / gpz; + + g->nx0 = nx0; + g->ny0 = ny0; + g->nz0 = nz0; + + f = (double)nx0 / (double)gnx; g->x0 = gx0*(1-f) + gx1*f; + f = (double)ny0 / (double)gny; g->y0 = gy0*(1-f) + gy1*f; + f = (double)nz0 / (double)gnz; g->z0 = gz0*(1-f) + gz1*f; + + f = (double)nx1 / (double)gnx; g->x1 = gx0*(1-f) + gx1*f; + f = (double)ny1 / (double)gny; g->y1 = gy0*(1-f) + gy1*f; + f = (double)nz1 / (double)gnz; g->z1 = gz0*(1-f) + gz1*f; // Size the local grid - size_grid(g,gnx/gpx,gny/gpy,gnz/gpz); + size_grid(g,nx1-nx0,ny1-ny0,nz1-nz0); // Join the grid to neighbors INDEX_TO_RANK(px-1,py, pz, rank); join_grid(g,BOUNDARY((-1), 0, 0),rank); @@ -113,30 +125,30 @@ partition_absorbing_box( grid_t * g, RANK_TO_INDEX( world_rank, px,py,pz ); - if( px==0 && gnx>1 ) { + if( px==0 && gnx>1 ) { set_fbc(g,BOUNDARY((-1),0,0),absorb_fields); set_pbc(g,BOUNDARY((-1),0,0),pbc); - } + } if( px==gpx-1 && gnx>1 ) { set_fbc(g,BOUNDARY( 1,0,0),absorb_fields); set_pbc(g,BOUNDARY( 1,0,0),pbc); } - if( py==0 && gny>1 ) { + if( py==0 && gny>1 ) { set_fbc(g,BOUNDARY(0,(-1),0),absorb_fields); set_pbc(g,BOUNDARY(0,(-1),0),pbc); - } + } if( py==gpy-1 && gny>1 ) { set_fbc(g,BOUNDARY(0, 1,0),absorb_fields); set_pbc(g,BOUNDARY(0, 1,0),pbc); } - if( pz==0 && gnz>1 ) { + if( pz==0 && gnz>1 ) { set_fbc(g,BOUNDARY(0,0,(-1)),absorb_fields); set_pbc(g,BOUNDARY(0,0,(-1)),pbc); - } + } if( pz==gpz-1 && gnz>1 ) { set_fbc(g,BOUNDARY(0,0, 1),absorb_fields); @@ -195,3 +207,264 @@ partition_metal_box( grid_t * g, set_pbc(g,BOUNDARY(0,0,1),reflect_particles); } } + +void +partition_nonuniform_periodic_box( grid_t *g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + Cost& c ) { + double f; + int rank, px, py, pz; + + // Make sure the grid can be setup + + if( !g ) ERROR(( "NULL grid" )); + + if( gpx<1 || gpy<1 || gpz<1 || gpx*gpy*gpz!=world_size ) + ERROR(( "Bad domain decompostion (%ix%ix%i)", gpx, gpy, gpz )); + + if( gnx<1 || gny<1 || gnz<1 || gnx%gpx!=0 || gny%gpy!=0 || gnz%gpz!=0 ) + ERROR(( "Bad resolution (%ix%ix%i) for domain decomposition", + gnx, gny, gnz, gpx, gpy, gpz )); + + // Setup basic variables + RANK_TO_INDEX( world_rank, px,py,pz ); + + // Capture global processor decomposition + g->gnx = gnx; + g->gny = gny; + g->gnz = gnz; + + g->dx = (gx1-gx0)/(double)gnx; + g->dy = (gy1-gy0)/(double)gny; + g->dz = (gz1-gz0)/(double)gnz; + g->dV = ((gx1-gx0)/(double)gnx)* + ((gy1-gy0)/(double)gny)* + ((gz1-gz0)/(double)gnz); + + g->rdx = (double)gnx/(gx1-gx0); + g->rdy = (double)gny/(gy1-gy0); + g->rdz = (double)gnz/(gz1-gz0); + g->r8V = ((double)gnx/(gx1-gx0))* + ((double)gny/(gy1-gy0))* + ((double)gnz/(gz1-gz0))*0.125; + + // We assume that calling the cost function is cheap, so we will call it + // three times for every grid cell instead of storing the full 3d profile of + // per-cell costs in ram + float* xcost = new float[gnx+2]; + float* ycost = new float[gny+2]; + float* zcost = new float[gnz+2]; + + // Prefix sum of cost, integrated over y and z. (Should we take max instead of sum?) + xcost[0] = 0.; + for(int i = 0; i < gnx; i++) { + float sum = 0.; + for(int j = 0; j < gny; j++) { + for(int k = 0; k < gnz; k++) { + sum += c.cost(i,j,k); + } + } + xcost[i+1] = xcost[i] + sum; + } + xcost[gnx+1] = 1./0.; + + ycost[0] = 0.; + for(int j = 0; j < gny; j++) { + float sum = 0.; + for(int i = 0; i < gny; i++) { + for(int k = 0; k < gnz; k++) { + sum += c.cost(i,j,k); + } + } + ycost[j+1] = ycost[j] + sum; + } + ycost[gny+1] = 1./0.; + + zcost[0] = 0.; + for(int k = 0; k < gnz; k++) { + float sum = 0.; + for(int i = 0; i < gnz; i++) { + for(int j = 0; j < gny; j++) { + sum += c.cost(i,j,k); + } + } + zcost[k+1] = zcost[k] + sum; + } + zcost[gnz+1] = 1./0.; + + // Scan for fair boundaries in x + int nx0 = 0; + int i = 0; + while(xcost[i]/xcost[gnx] <= px/float(gpx)) { + nx0 = i; + i++; + } + int nx1 = 0; + i = 0; + while(xcost[i]/xcost[gnx] <= (px+1)/float(gpx)) { + nx1 = i; + i++; + } + + // Scan for fair boundaries in y + int ny0 = 0; + int j = 0; + while(ycost[j]/ycost[gny] <= py/float(gpy)) { + ny0 = j; + j++; + } + int ny1 = 0; + j = 0; + while(ycost[j]/ycost[gny] <= (py+1)/float(gpy)) { + ny1 = j; + j++; + } + + // Scan for fair boundaries in z + int nz0 = 0; + int k = 0; + while(zcost[k]/zcost[gnz] <= pz/float(gpz)) { + nz0 = k; + k++; + } + int nz1 = 0; + k = 0; + while(zcost[k]/zcost[gnz] <= (pz+1)/float(gpz)) { + nz1 = k; + k++; + } + + delete[] xcost; + delete[] ycost; + delete[] zcost; + + g->nx0 = nx0; + g->ny0 = ny0; + g->nz0 = nz0; + + f = (double)nx0 / (double)gnx; g->x0 = gx0*(1-f) + gx1*f; + f = (double)ny0 / (double)gny; g->y0 = gy0*(1-f) + gy1*f; + f = (double)nz0 / (double)gnz; g->z0 = gz0*(1-f) + gz1*f; + + f = (double)nx1 / (double)gnx; g->x1 = gx0*(1-f) + gx1*f; + f = (double)ny1 / (double)gny; g->y1 = gy0*(1-f) + gy1*f; + f = (double)nz1 / (double)gnz; g->z1 = gz0*(1-f) + gz1*f; + + // Size the local grid + size_grid(g,nx1-nx0,ny1-ny0,nz1-nz0); + + // Join the grid to neighbors + INDEX_TO_RANK(px-1,py, pz, rank); join_grid(g,BOUNDARY((-1), 0, 0),rank); + INDEX_TO_RANK(px, py-1,pz, rank); join_grid(g,BOUNDARY( 0,(-1), 0),rank); + INDEX_TO_RANK(px, py, pz-1,rank); join_grid(g,BOUNDARY( 0, 0,(-1)),rank); + INDEX_TO_RANK(px+1,py, pz, rank); join_grid(g,BOUNDARY( 1, 0, 0),rank); + INDEX_TO_RANK(px, py+1,pz, rank); join_grid(g,BOUNDARY( 0, 1, 0),rank); + INDEX_TO_RANK(px, py, pz+1,rank); join_grid(g,BOUNDARY( 0, 0, 1),rank); +} + +void +partition_nonuniform_absorbing_box( grid_t * g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + int pbc, + Cost& c ) { + int px, py, pz; + + partition_nonuniform_periodic_box( g, + gx0, gy0, gz0, + gx1, gy1, gz1, + gnx, gny, gnz, + gpx, gpy, gpz, + c ); + + // Override periodic boundary conditions + + RANK_TO_INDEX( world_rank, px,py,pz ); + + if( px==0 && gnx>1 ) { + set_fbc(g,BOUNDARY((-1),0,0),absorb_fields); + set_pbc(g,BOUNDARY((-1),0,0),pbc); + } + + if( px==gpx-1 && gnx>1 ) { + set_fbc(g,BOUNDARY( 1,0,0),absorb_fields); + set_pbc(g,BOUNDARY( 1,0,0),pbc); + } + + if( py==0 && gny>1 ) { + set_fbc(g,BOUNDARY(0,(-1),0),absorb_fields); + set_pbc(g,BOUNDARY(0,(-1),0),pbc); + } + + if( py==gpy-1 && gny>1 ) { + set_fbc(g,BOUNDARY(0, 1,0),absorb_fields); + set_pbc(g,BOUNDARY(0, 1,0),pbc); + } + + if( pz==0 && gnz>1 ) { + set_fbc(g,BOUNDARY(0,0,(-1)),absorb_fields); + set_pbc(g,BOUNDARY(0,0,(-1)),pbc); + } + + if( pz==gpz-1 && gnz>1 ) { + set_fbc(g,BOUNDARY(0,0, 1),absorb_fields); + set_pbc(g,BOUNDARY(0,0, 1),pbc); + } +} + +void +partition_nonuniform_metal_box( grid_t * g, + double gx0, double gy0, double gz0, + double gx1, double gy1, double gz1, + int gnx, int gny, int gnz, + int gpx, int gpy, int gpz, + Cost& c) { + int px, py, pz; + + partition_nonuniform_periodic_box( g, + gx0, gy0, gz0, + gx1, gy1, gz1, + gnx, gny, gnz, + gpx, gpy, gpz, + c ); + + // Override periodic boundary conditions + + RANK_TO_INDEX( world_rank, px,py,pz ); + + if( px==0 && gnx>1 ) { + set_fbc(g,BOUNDARY((-1),0,0),anti_symmetric_fields); + set_pbc(g,BOUNDARY((-1),0,0),reflect_particles); + } + + if( px==gpx-1 && gnx>1 ) { + set_fbc(g,BOUNDARY(1,0,0),anti_symmetric_fields); + set_pbc(g,BOUNDARY(1,0,0),reflect_particles); + } + + if( py==0 && gny>1 ) { + set_fbc(g,BOUNDARY(0,(-1),0),anti_symmetric_fields); + set_pbc(g,BOUNDARY(0,(-1),0),reflect_particles); + } + + if( py==gpy-1 && gny>1 ) { + set_fbc(g,BOUNDARY(0,1,0),anti_symmetric_fields); + set_pbc(g,BOUNDARY(0,1,0),reflect_particles); + } + + if( pz==0 && gnz>1 ) { + set_fbc(g,BOUNDARY(0,0,(-1)),anti_symmetric_fields); + set_pbc(g,BOUNDARY(0,0,(-1)),reflect_particles); + } + + if( pz==gpz-1 && gnz>1 ) { + set_fbc(g,BOUNDARY(0,0,1),anti_symmetric_fields); + set_pbc(g,BOUNDARY(0,0,1),reflect_particles); + } +} + diff --git a/src/vpic/dump.cc b/src/vpic/dump.cc index 6bda20da..a263989c 100644 --- a/src/vpic/dump.cc +++ b/src/vpic/dump.cc @@ -283,22 +283,6 @@ vpic_simulation::dump_hydro( const char *sp_name, #ifdef VPIC_ENABLE_HDF5 #define DUMP_DIR_FORMAT "./%s" -// TODO: rename or remove this -#define RANK_TO_INDEX2(rank, ix, iy, iz) \ - BEGIN_PRIMITIVE \ - { \ - int _ix, _iy, _iz; \ - _ix = (rank); /* ix = ix+gpx*( iy+gpy*iz ) */ \ - _iy = _ix / grid->gpx; /* iy = iy+gpy*iz */ \ - _ix -= _iy * grid->gpx; /* ix = ix */ \ - _iz = _iy / grid->gpy; /* iz = iz */ \ - _iy -= _iz * grid->gpy; /* iy = iy */ \ - (ix) = _ix; \ - (iy) = _iy; \ - (iz) = _iz; \ - } \ - END_PRIMITIVE - /* define to do C-style indexing */ #define hydro(x, y, z) hydro_array->h[VOXEL(x, y, z, grid->nx, grid->ny, grid->nz)] @@ -397,19 +381,8 @@ vpic_simulation::dump_fields_hdf5( const char *fbase, int ftag ) hid_t group_id = H5Gcreate(file_id, fname, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); el1 = uptime() - el1; - //sim_log("TimeHDF5Open): " << el1 << " s"); //Easy to handle results for scripts double el2 = uptime(); - /* -// Create a variable list of field values to output. -size_t numvars = std::min(global->fdParams.output_vars.bitsum(), total_field_variables); -size_t * varlist = new size_t[numvars]; - -for(size_t i(0), c(0); ifdParams.output_vars.bitset(i)) varlist[c++] = i; - -printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/ - #define fpp(x, y, z) f[VOXEL(x, y, z, grid->nx, grid->ny, grid->nz)] /* typedef struct field { @@ -420,6 +393,7 @@ printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/ material_id ematx, ematy, ematz, nmat; // Material at edge centers and nodes material_id fmatx, fmaty, fmatz, cmat; // Material at face and cell centers } field_t;*/ + // Local voxel mesh resolution. Voxels are // indexed FORTRAN style 0:nx+1,0:ny+1,0:nz+1 // with voxels 1:nx,1:ny,1:nz being non-ghost @@ -428,57 +402,31 @@ printf("\nBEGIN_OUTPUT: numvars = %zd \n", numvars);*/ float *temp_buf = (float *)malloc(sizeof(float) * (grid->nx) * (grid->ny) * (grid->nz)); hsize_t temp_buf_index; hid_t dset_id; - //char *field_var_name[] = {"ex","ey","ez","div_e_err","cbx","cby","cbz","div_b_err","tcax","tcay","tcaz","rhob","jfx","jfy","jfz","rhof"}; plist_id = H5Pcreate(H5P_DATASET_XFER); - //Comment out for test only H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); - //H5Sselect_hyperslab(filespace, H5S_SELECT_SET, (hsize_t *) &offset, NULL, (hsize_t *) &numparticles, NULL); - - //global->topology_x hsize_t field_global_size[3], field_local_size[3], global_offset[3], global_count[3]; - field_global_size[0] = (grid->nx * grid->gpx); - field_global_size[1] = (grid->ny * grid->gpy); - field_global_size[2] = (grid->nz * grid->gpz); + field_global_size[0] = grid->gnx; + field_global_size[1] = grid->gny; + field_global_size[2] = grid->gnz; field_local_size[0] = grid->nx; field_local_size[1] = grid->ny; field_local_size[2] = grid->nz; - int gpx = grid->gpx; - int gpy = grid->gpy; - int gpz = grid->gpz; - - int mpi_rank_x, mpi_rank_y, mpi_rank_z; - //RANK_TO_INDEX2(mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z); - - int _ix, _iy, _iz; - _ix = (mpi_rank); - _iy = _ix / grid->gpx; - _ix -= _iy * grid->gpx; - _iz = _iy / grid->gpy; - _iy -= _iz * grid->gpy; - int ix = _ix; - int iy = _iy; - int iz = _iz; - - mpi_rank_x = ix; - mpi_rank_y = iy; - mpi_rank_z = iz; - - global_offset[0] = (grid->nx) * mpi_rank_x; - global_offset[1] = (grid->ny) * mpi_rank_y; - global_offset[2] = (grid->nz) * mpi_rank_z; + global_offset[0] = grid->nx0; + global_offset[1] = grid->ny0; + global_offset[2] = grid->nz0; - global_count[0] = (grid->nx); - global_count[1] = (grid->ny); - global_count[2] = (grid->nz); + global_count[0] = grid->nx; + global_count[1] = grid->ny; + global_count[2] = grid->nz; #ifdef DUMP_INFO_DEBUG printf("global size = %d %d %d \n", field_global_size[0], field_global_size[1], field_global_size[2]); printf("global_offset = %d %d %d \n", global_offset[0], global_offset[1], global_offset[2]); printf("global_count = %d %d %d \n", global_count[0], global_count[1], global_count[2]); - printf("mpi-rank = %d, rank index = (%d, %d, %d) \n", mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z); + printf("mpi-rank = %d\n", mpi_rank); fflush(stdout); #endif @@ -706,17 +654,6 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname, //sim_log("TimeHDF5Open: " << el1 << " s"); //Easy to handle results for scripts //double el2 = uptime(); - // Create a variable list of field values to output. - //size_t numvars = std::min(global->fdParams.output_vars.bitsum(), total_field_variables); - //size_t *varlist = new size_t[numvars]; - - //for (size_t i(0), c(0); i < total_field_variables; i++) - // if (global->fdParams.output_vars.bitset(i)) - // varlist[c++] = i; - - //printf("\nBEGIN_OUTPUT: numvars = %zu \n", numvars); - - //typedef struct hydro { // float jx, jy, jz, rho; // Current and charge density => , // float px, py, pz, ke; // Momentum and K.E. density => , @@ -733,29 +670,21 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname, float *temp_buf = (float *)malloc(sizeof(float) * (grid->nx) * (grid->ny) * (grid->nz)); hsize_t temp_buf_index; hid_t dset_id; - //char *field_var_name[] = {"ex","ey","ez","div_e_err","cbx","cby","cbz","div_b_err","tcax","tcay","tcaz","rhob","jfx","jfy","jfz","rhof"}; plist_id = H5Pcreate(H5P_DATASET_XFER); - //Comment out for test only H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE); - //H5Sselect_hyperslab(filespace, H5S_SELECT_SET, (hsize_t *) &offset, NULL, (hsize_t *) &numparticles, NULL); - - //global->topology_x hsize_t hydro_global_size[3], hydro_local_size[3], global_offset[3], global_count[3]; - hydro_global_size[0] = (grid->nx * grid->gpx); - hydro_global_size[1] = (grid->ny * grid->gpy); - hydro_global_size[2] = (grid->nz * grid->gpz); + hydro_global_size[0] = grid->gnx; + hydro_global_size[1] = grid->gny; + hydro_global_size[2] = grid->gnz; hydro_local_size[0] = grid->nx; hydro_local_size[1] = grid->ny; hydro_local_size[2] = grid->nz; - int mpi_rank_x, mpi_rank_y, mpi_rank_z; - RANK_TO_INDEX2(mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z); - - global_offset[0] = (grid->nx) * mpi_rank_x; - global_offset[1] = (grid->ny) * mpi_rank_y; - global_offset[2] = (grid->nz) * mpi_rank_z; + global_offset[0] = grid->nx0; + global_offset[1] = grid->ny0; + global_offset[2] = grid->nz0; global_count[0] = (grid->nx); global_count[1] = (grid->ny); @@ -765,7 +694,7 @@ void vpic_simulation::dump_hydro_hdf5( const char *speciesname, printf("global size = %d %d %d \n", hydro_global_size[0], hydro_global_size[1], hydro_global_size[2]); printf("global_offset = %d %d %d \n", global_offset[0], global_offset[1], global_offset[2]); printf("global_count = %d %d %d \n", global_count[0], global_count[1], global_count[2]); - printf("mpi-rank = %d, rank index = (%d, %d, %d) \n", mpi_rank, mpi_rank_x, mpi_rank_y, mpi_rank_z); + printf("mpi-rank = %d\n", mpi_rank); fflush(stdout); #endif @@ -1016,11 +945,8 @@ _iy -= _iz*int(ny); /* iy = iy */ \ { int local_i = sp->p[i].i; - int ix, iy, iz, rx, ry, rz; - // Convert rank to local x/y/z - UNVOXEL(mpi_rank, rx, ry, rz, grid->gpx, grid->gpy, grid->gpz); - // Calculate local ix/iy/iz + int ix, iy, iz; UNVOXEL(local_i, ix, iy, iz, grid->nx+2, grid->ny+2, grid->nz+2); // Account for the "first" ghost cell @@ -1029,14 +955,14 @@ _iy -= _iz*int(ny); /* iy = iy */ \ iz = iz - 1; // Convert ix/iy/iz to global - int gix = ix + (grid->nx * rx); - int giy = iy + (grid->ny * ry); - int giz = iz + (grid->nz * rz); + int gix = ix + grid->nx0; + int giy = iy + grid->ny0; + int giz = iz + grid->nz0; - // calculate global grid sizes - int gnx = grid->nx * grid->gpx; - int gny = grid->ny * grid->gpy; - int gnz = grid->nz * grid->gpz; + // global grid sizes + int gnx = grid->gnx; + int gny = grid->gny; + int gnz = grid->gnz; // TODO: find a better way to account for the hard coded ghosts in VOXEL int global_i = VOXEL(gix, giy, giz, gnx-2, gny-2, gnz-2); @@ -1440,7 +1366,7 @@ vpic_simulation::global_header( const char * base, void vpic_simulation::field_dump( DumpParameters & dumpParams, - field_t *f, + field_t *f, int64_t userStep ) { long dumpStep = ( userStep == -1 ) ? (long) step() : userStep; @@ -1541,8 +1467,8 @@ vpic_simulation::field_dump( DumpParameters & dumpParams, // more efficient for standard case if ( istride == 1 && - jstride == 1 && - kstride == 1 ) + jstride == 1 && + kstride == 1 ) { for(size_t v(0); v