From b099d1f83c1ca9a965263b9195e02a389f72cc8e Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 22 Feb 2024 16:39:20 +0000 Subject: [PATCH 01/52] ASMExtrudedStarPC: include vertical edge and face star patches --- firedrake/preconditioners/asm.py | 148 +++++++++++++++++++------------ 1 file changed, 93 insertions(+), 55 deletions(-) diff --git a/firedrake/preconditioners/asm.py b/firedrake/preconditioners/asm.py index 9c44664133..32e474696b 100644 --- a/firedrake/preconditioners/asm.py +++ b/firedrake/preconditioners/asm.py @@ -65,9 +65,11 @@ def initialize(self, pc): opts["sub_pc_factor_shift_type"] = "NONE" # If an ordering type is provided, PCASM should not sort patch indices, otherwise it can. - sentinel = object() - ordering = PETSc.Options().getString(self.prefix + "mat_ordering_type", default=sentinel) - asmpc.setASMSortIndices(ordering is sentinel) + mat_type = P.getType() + if not mat_type.endswith("sbaij"): + sentinel = object() + ordering = PETSc.Options().getString(self.prefix + "mat_ordering_type", default=sentinel) + asmpc.setASMSortIndices(ordering is sentinel) lgmap = V.dof_dset.lgmap # Translate to global numbers @@ -90,14 +92,14 @@ def initialize(self, pc): sum(W.value_size * W.dof_dset.total_size for W in V)) asmpc.setUp() else: - raise ValueError(f"Unknown backend type f{backend}") + raise ValueError(f"Unknown backend type {backend}") asmpc.setFromOptions() self.asmpc = asmpc @abc.abstractmethod def get_patches(self, V): - ''' Get the patches used for PETSc PSASM + ''' Get the patches used for PETSc PCASM :param V: the :class:`~.FunctionSpace`. @@ -110,6 +112,12 @@ def view(self, pc, viewer=None): self.asmpc.view(viewer=viewer) def update(self, pc): + # This is required to update an inplace ILU/ICC factorization + try: + for sub in self.asmpc.getASMSubKSP(): + sub.getOperators()[0].setUnfactored() + except PETSc.Error: + pass self.asmpc.setUp() def apply(self, pc, x, y): @@ -302,7 +310,7 @@ def get_patches(self, V): def order_points(mesh_dm, points, ordering_type, prefix): - '''Order a the points (topological entities) of a patch based + '''Order the points (topological entities) of a patch based on the adjacency graph of the mesh. :arg mesh_dm: the `mesh.topology_dm` @@ -312,6 +320,8 @@ def order_points(mesh_dm, points, ordering_type, prefix): :returns: the permuted array of points ''' + # Order points by decreasing topological dimension (interiors, faces, edges, vertices) + points = points[::-1] if ordering_type == "natural": return points subgraph = [numpy.intersect1d(points, mesh_dm.getAdjacency(p), return_indices=True)[1] for p in points] @@ -319,9 +329,12 @@ def order_points(mesh_dm, points, ordering_type, prefix): ja = numpy.concatenate(subgraph).astype(PETSc.IntType) A = PETSc.Mat().createAIJ((len(points), )*2, csr=(ia, ja, numpy.ones(ja.shape, PETSc.RealType)), comm=PETSc.COMM_SELF) A.setOptionsPrefix(prefix) - rperm, _ = A.getOrdering(ordering_type) + rperm, cperm = A.getOrdering(ordering_type) + indices = points[rperm.getIndices()] A.destroy() - return points[rperm.getIndices()] + rperm.destroy() + cperm.destroy() + return indices def get_basemesh_nodes(W): @@ -395,54 +408,79 @@ def get_patches(self, V): # Build index sets for the patches ises = [] - start, end = mesh_dm.getDepthStratum(depth) - pstart, _ = mesh_dm.getChart() - for seed in range(start, end): - # Only build patches over owned DoFs - if mesh_dm.getLabelValue("pyop2_ghost", seed) != -1: + # Build a base_depth-star on the base mesh and extrude it by an + # interval_depth-star on the interval mesh such that the depths sum to depth + # and 0 <= base_depth <= dim, 0 <= interval_depth <= 1. + # + # Vertex-stars: depth = 0 = 0 + 0. + # 0 + 0 -> vertex-star = (2D vertex-star) x (1D vertex-star) + # + # Edge-stars: depth = 1 = 1 + 0 = 0 + 1. + # 1 + 0 -> horizontal edge-star = (2D edge-star) x (1D vertex-star) + # 0 + 1 -> vertical edge-star = (2D vertex-star) x (1D interior) + # + # Face-stars: depth = 2 = 2 + 0 = 1 + 1. + # 2 + 0 -> horizontal face-star = (2D interior) x (1D vertex-star) + # 1 + 1 -> vertical face-star = (2D edge-star) x (1D interior) + for base_depth in range(depth+1): + interval_depth = depth - base_depth + if interval_depth > 1: continue - # Create point list from mesh DM - points, _ = mesh_dm.getTransitiveClosure(seed, useCone=False) - points = order_points(mesh_dm, points, ordering, self.prefix) - points -= pstart # offset by chart start - for k in range(nlayers): - if k == 0: - planes = [1, 0] - elif k == nlayers - 1: - planes = [-1, 0] - else: - planes = [-1, 1, 0] - - indices = [] - # Get DoF indices for patch - for i, W in enumerate(V): - iset = V_ises[i] - for plane in planes: - for p in points: - # How to walk up one layer - blayer_offset = basemeshlayeroffsets[i][p] - if blayer_offset <= 0: - # In this case we don't have any dofs on - # this entity. - continue - # Offset in the global array for the bottom of - # the column - off = basemeshoff[i][p] - # Number of dofs in the interior of the - # vertical interval cell on top of this base - # entity - dof = basemeshdof[i][p] - # Hard-code taking the star - if plane == 0: - begin = off + k * blayer_offset - end = off + k * blayer_offset + dof - else: - begin = off + min(k, k+plane) * blayer_offset + dof - end = off + max(k, k+plane) * blayer_offset - zlice = slice(W.value_size * begin, W.value_size * end) - indices.extend(iset[zlice]) + start, end = mesh_dm.getDepthStratum(base_depth) + pstart, _ = mesh_dm.getChart() + for seed in range(start, end): + # Only build patches over owned DoFs + if mesh_dm.getLabelValue("pyop2_ghost", seed) != -1: + continue - iset = PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) - ises.append(iset) + # Create point list from mesh DM + points, _ = mesh_dm.getTransitiveClosure(seed, useCone=False) + points = order_points(mesh_dm, points, ordering, self.prefix) + points -= pstart # offset by chart start + for k in range(nlayers-interval_depth): + if interval_depth == 1: + # extrude by 1D interior + planes = [1] + elif k == 0: + # extrude by 1D vertex-star on the bottom + planes = [1, 0] + elif k == nlayers - 1: + # extrude by 1D vertex-star on the top + planes = [-1, 0] + else: + # extrude by 1D vertex-star + planes = [-1, 1, 0] + + indices = [] + # Get DoF indices for patch + for i, W in enumerate(V): + iset = V_ises[i] + for plane in planes: + for p in points: + # How to walk up one layer + blayer_offset = basemeshlayeroffsets[i][p] + if blayer_offset <= 0: + # In this case we don't have any dofs on + # this entity. + continue + # Offset in the global array for the bottom of + # the column + off = basemeshoff[i][p] + # Number of dofs in the interior of the + # vertical interval cell on top of this base + # entity + dof = basemeshdof[i][p] + # Hard-code taking the star + if plane == 0: + begin = off + k * blayer_offset + end = off + k * blayer_offset + dof + else: + begin = off + min(k, k+plane) * blayer_offset + dof + end = off + max(k, k+plane) * blayer_offset + zlice = slice(W.value_size * begin, W.value_size * end) + indices.extend(iset[zlice]) + + iset = PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) + ises.append(iset) return ises From 584b51310048a445ababac61a0f04820aed9a60e Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 22 Feb 2024 17:44:48 +0000 Subject: [PATCH 02/52] ASMExtrudedStar: test edge/face-star equivalence with Jacobi on NCE(1)/NCF(1) --- tests/multigrid/test_hiptmair.py | 63 +++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/tests/multigrid/test_hiptmair.py b/tests/multigrid/test_hiptmair.py index d27002ca0e..35d1195c63 100644 --- a/tests/multigrid/test_hiptmair.py +++ b/tests/multigrid/test_hiptmair.py @@ -3,9 +3,11 @@ import pytest -def mesh_hierarchy(cell): +@pytest.fixture(params=["tetrahedron", "hexahedron"]) +def mesh_hierarchy(request): nx = 5 nlevels = 2 + cell = request.param if cell == "tetrahedron": base = UnitCubeMesh(nx, nx, nx) return MeshHierarchy(base, nlevels) @@ -15,12 +17,27 @@ def mesh_hierarchy(cell): return ExtrudedMeshHierarchy(basemh, height=1, base_layer=nx) -def run_riesz_map(V, mat_type): - relax = { +def run_riesz_map(V, mat_type, max_it): + jacobi = { "mat_type": mat_type, "ksp_type": "preonly", "pc_type": "jacobi", } + potential = jacobi + if V.mesh().extruded and mat_type == "aij": + # Test equivalence of Jacobi and ASM on edge/face-stars + formdegree = V.finat_element.formdegree + relax = { + "ksp_type": "preonly", + "pc_type": "python", + "pc_python_type": "firedrake.ASMExtrudedStarPC", + "pc_star_construct_dim": formdegree, + "pc_star_sub_sub_ksp_type": "preonly", + "pc_star_sub_sub_pc_type": "jacobi", + } + else: + relax = jacobi + coarse = { "ksp_type": "preonly", "pc_type": "cholesky", @@ -34,9 +51,11 @@ def run_riesz_map(V, mat_type): } parameters = { "mat_type": mat_type, - "snes_type": "ksponly", + "ksp_max_it": max_it, "ksp_type": "cg", "ksp_norm_type": "natural", + "ksp_monitor": None, + "ksp_convergence_test": "skip", "pc_type": "mg", "mg_coarse": coarse, "mg_levels": { @@ -45,7 +64,7 @@ def run_riesz_map(V, mat_type): "pc_type": "python", "pc_python_type": "firedrake.HiptmairPC", "hiptmair_mg_levels": relax, - "hiptmair_mg_coarse": relax, + "hiptmair_mg_coarse": potential, }, } @@ -70,24 +89,32 @@ def run_riesz_map(V, mat_type): problem = LinearVariationalProblem(a, L, uh, bcs=bcs, form_compiler_parameters={"mode": "vanilla"}) solver = LinearVariationalSolver(problem, solver_parameters=parameters, appctx=appctx) solver.solve() - return solver.snes.ksp.getIterationNumber() + return errornorm(u_exact, uh) @pytest.mark.skipcomplexnoslate -@pytest.mark.parametrize(["family", "cell"], - [("N1curl", "tetrahedron"), ("NCE", "hexahedron")]) -def test_hiptmair_hcurl(family, cell): - mesh = mesh_hierarchy(cell)[-1] +@pytest.mark.parametrize("mat_type", ["aij", "matfree"]) +def test_hiptmair_hcurl(mesh_hierarchy, mat_type): + mesh = mesh_hierarchy[-1] + if mesh.ufl_cell().is_simplex(): + family = "N1curl" + max_it = 14 + else: + family = "NCE" + max_it = 5 V = FunctionSpace(mesh, family, degree=1) - assert run_riesz_map(V, "aij") <= 15 - assert run_riesz_map(V, "matfree") <= 15 + assert run_riesz_map(V, mat_type, max_it) < 1E-6 @pytest.mark.skipcomplexnoslate -@pytest.mark.parametrize(["family", "cell"], - [("RT", "tetrahedron"), ("NCF", "hexahedron")]) -def test_hiptmair_hdiv(family, cell): - mesh = mesh_hierarchy(cell)[-1] +@pytest.mark.parametrize("mat_type", ["aij", "matfree"]) +def test_hiptmair_hdiv(mesh_hierarchy, mat_type): + mesh = mesh_hierarchy[-1] + if mesh.ufl_cell().is_simplex(): + family = "N1div" + max_it = 14 + else: + family = "NCF" + max_it = 7 V = FunctionSpace(mesh, family, degree=1) - assert run_riesz_map(V, "aij") <= 12 - assert run_riesz_map(V, "matfree") <= 12 + assert run_riesz_map(V, mat_type, max_it) < 1E-6 From 54266200ed00ed0c78e581ab2342f96598cac1c6 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 23 Feb 2024 09:13:41 +0000 Subject: [PATCH 03/52] Update firedrake/preconditioners/asm.py --- firedrake/preconditioners/asm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/asm.py b/firedrake/preconditioners/asm.py index 32e474696b..268d4b8070 100644 --- a/firedrake/preconditioners/asm.py +++ b/firedrake/preconditioners/asm.py @@ -410,7 +410,7 @@ def get_patches(self, V): ises = [] # Build a base_depth-star on the base mesh and extrude it by an # interval_depth-star on the interval mesh such that the depths sum to depth - # and 0 <= base_depth <= dim, 0 <= interval_depth <= 1. + # and 0 <= interval_depth <= 1. # # Vertex-stars: depth = 0 = 0 + 0. # 0 + 0 -> vertex-star = (2D vertex-star) x (1D vertex-star) From 8b743b4e484a6a682c9d1d94c1edb0100b6ad938 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 23 Feb 2024 16:13:38 +0000 Subject: [PATCH 04/52] remove try ... except, as it was not required --- firedrake/preconditioners/asm.py | 10 +++------- firedrake/preconditioners/facet_split.py | 3 +-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/firedrake/preconditioners/asm.py b/firedrake/preconditioners/asm.py index 32e474696b..1091712a18 100644 --- a/firedrake/preconditioners/asm.py +++ b/firedrake/preconditioners/asm.py @@ -112,13 +112,9 @@ def view(self, pc, viewer=None): self.asmpc.view(viewer=viewer) def update(self, pc): - # This is required to update an inplace ILU/ICC factorization - try: - for sub in self.asmpc.getASMSubKSP(): - sub.getOperators()[0].setUnfactored() - except PETSc.Error: - pass - self.asmpc.setUp() + # This is required to update an inplace ILU factorization + for sub in self.asmpc.getASMSubKSP(): + sub.getOperators()[0].setUnfactored() def apply(self, pc, x, y): self.asmpc.apply(x, y) diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index a15d2af662..9585d9bc62 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -238,8 +238,7 @@ def restricted_dofs(celem, felem): def get_permutation_map(V, W): - perm = numpy.empty((V.dof_count, ), dtype=PETSc.IntType) - perm.fill(-1) + perm = numpy.full((V.dof_count,), -1, dtype=PETSc.IntType) vdat = V.make_dat(val=perm) offset = 0 From 5da9b7916efb97c435e25af316e4324a43bc0fde Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 26 Feb 2024 14:13:47 +0000 Subject: [PATCH 05/52] FDMPC: support MATIS --- firedrake/preconditioners/fdm.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 40d2ae7b15..3f4c96f3f9 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -291,15 +291,24 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) + P.setLGMap(Vrow.dof_dset.lgmap, Vcol.dof_dset.lgmap) + P.setPreallocationNNZ((dnz, onz)) - P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False) - P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) + + #P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False) + P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, False) + #P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) + P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) P.setUp() + self.set_values(P, Vrow, Vcol, addv, mat_type=ptype) + P.assemble() + if on_diag: + P.shift(1) + # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype)) @@ -307,9 +316,10 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) - if len(bdofs) > 0: - vals = numpy.ones(bdofs.shape, dtype=PETSc.RealType) - assembly_callables.append(partial(P.setValuesRCV, bdofs, bdofs, vals, addv)) + + assembly_callables.append(P.assemble) + assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) + #if len(bdofs) > 0: gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): From 2f70fb1f6fe3e881b07d39d6180888d152997434 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 26 Feb 2024 19:01:50 +0000 Subject: [PATCH 06/52] non_ghosted_lgmaps --- firedrake/preconditioners/fdm.py | 35 +++++- firedrake/preconditioners/pmg.py | 183 ++++++++++++++++++------------- 2 files changed, 142 insertions(+), 76 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 3f4c96f3f9..3fb202c079 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -190,6 +190,31 @@ def initialize(self, pc): else: fdmpc.setFromOptions() + def get_non_ghosted_lgmap(self, V): + ncell = V.mesh().cell_set.size + non_ghost_cell_nodes = numpy.unique(V.cell_node_list[:ncell].flatten()) + + lgmap = V.dof_dset.lgmap + comm = lgmap.getComm() + print_rank = -1 + if comm.rank == print_rank: + cn = V.cell_node_list + cn = lgmap.apply(cn).reshape(cn.shape) + print("number of cells", comm.rank, ncell, flush=True) + print("cell_node_list", cn, flush=True) + + if comm.rank == print_rank: + print("indices before", comm.rank, lgmap.indices, flush=True) + + indices = [ind if i in non_ghost_cell_nodes else -1 for i, ind in enumerate(lgmap.indices)] + if comm.rank == print_rank: + print("indices xxx", comm.rank, indices, flush=True) + + #indices[numpy.ghost_cell_nodes] = -1 + if comm.rank == print_rank: + print("indices after", comm.rank, indices, flush=True) + return PETSc.LGMap().create(indices, bsize=lgmap.block_size, comm=lgmap.getComm()) + @PETSc.Log.EventDecorator("FDMPrealloc") def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensation, use_amat): """ @@ -233,6 +258,8 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} + self.non_ghosted_lgmaps = {Vsub: self.get_non_ghosted_lgmap(Vsub) for Vsub in V} + self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} @@ -291,8 +318,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - P.setLGMap(Vrow.dof_dset.lgmap, Vcol.dof_dset.lgmap) + P.setLGMap(self.non_ghosted_lgmaps[Vrow], + self.non_ghosted_lgmaps[Vcol]) P.setPreallocationNNZ((dnz, onz)) #P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False) @@ -319,7 +347,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) - #if len(bdofs) > 0: gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -429,6 +456,7 @@ def condense(self, A, J, bcs, fcp, pc_type="icc"): Pmats = dict(Smats) C0 = self.assemble_reference_tensor(V0) R0 = self.assemble_reference_tensor(V0, transpose=True) + A0 = TripleProductKernel(R0, self._element_mass_matrix, C0) K0 = InteriorSolveKernel(A0, J00, fcp=fcp, pc_type=pc_type) K1 = ImplicitSchurComplementKernel(K0) @@ -704,6 +732,9 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): # Interpolation of basis and exterior derivative onto broken spaces C1 = self.assemble_reference_tensor(Vcol) R1 = self.assemble_reference_tensor(Vrow, transpose=True) + + + # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b element_kernel = TripleProductKernel(R1, M, C1) schur_kernel = self.schur_kernel.get(Vrow) if on_diag else None diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 82b538aea6..49cdb42aad 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -452,9 +452,11 @@ def configure_pmg(self, pc, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") + if opts.getString("mg_coarse_pc_type") == "mg": + opts["mg_coarse_pc_mg_levels"] = min(opts.getInt("mg_coarse_pc_mg_levels", max_levels), max_levels) return ppc def apply(self, pc, x, y): @@ -495,10 +497,13 @@ def configure_pmg(self, snes, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") + if opts.getString("fas_coarse_pc_type") == "mg": + opts["fas_coarse_pc_mg_levels"] = min(opts.getInt("fas_coarse_pc_mg_levels", max_levels), max_levels) + if opts.getString("fas_coarse_snes_type") == "fas": + opts["fas_coarse_snes_fas_levels"] = min(opts.getInt("fas_coarse_snes_fas_levels", max_levels), max_levels) return psnes def step(self, snes, x, f, y): @@ -607,22 +612,18 @@ def evaluate_dual(source, target, derivative=None): A read-only :class:`numpy.ndarray` with the evaluation of the target dual basis on the (derivative of the) source primal basis. """ - primal = source.get_nodal_basis() + if derivative is None: + primal = source.get_nodal_basis() + else: + from FIAT.demkowicz import project_derivative + primal = project_derivative(source, derivative) + + coeffs = primal.get_coeffs() dual = target.get_dual_set() - A = dual.to_riesz(primal) - B = numpy.transpose(primal.get_coeffs()) - if derivative == "grad": - dmats = primal.get_dmats() - assert len(dmats) == 1 - alpha = (1,) - for i in range(len(alpha)): - for j in range(alpha[i]): - B = numpy.dot(dmats[i], B) - elif derivative in ("curl", "div"): - raise NotImplementedError(f"Dual evaluation of {derivative} is not yet implemented.") - elif derivative is not None: - raise ValueError(f"Invalid derivaitve type {derivative}.") - return get_readonly_view(numpy.dot(A, B)) + dual_mat = dual.to_riesz(primal) + A = dual_mat.reshape((dual_mat.shape[0], -1)) + B = coeffs.reshape((-1, A.shape[1])) + return get_readonly_view(numpy.dot(A, B.T)) @cached({}, key=generate_key_evaluate_dual) @@ -1168,16 +1169,16 @@ class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ - _cache_work = {} - def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs, derivative=False): self.uc = self.work_function(Vc) self.uf = self.work_function(Vf) self.Vc = self.uc.function_space() self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs self.Vf_bcs = Vf_bcs + self.derivative = derivative def work_function(self, V): if isinstance(V, firedrake.Function): @@ -1195,7 +1196,6 @@ def _weight(self): kernel_code = f""" void weight(PetscScalar *restrict w){{ for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; - return; }} """ kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) @@ -1205,7 +1205,8 @@ def _weight(self): return weight @cached_property - def _kernels(self): + def _parloops(self): + self._prolong_write = True try: # We generate custom prolongation and restriction kernels mainly because: # 1. Code generation for the transpose of prolongation is not readily available @@ -1217,6 +1218,7 @@ def _kernels(self): self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), self._weight.dat(op2.READ, uf_map)] + self._prolong_write = False except ValueError: # The elements do not have the expected tensor product structure # Fall back to aij kernels @@ -1237,14 +1239,10 @@ def _kernels(self): return prolong, restrict def _prolong(self): - with self.uf.dat.vec_wo as uf: - uf.set(0.0E0) - self._kernels[0]() + self._parloops[0]() def _restrict(self): - with self.uc.dat.vec_wo as uc: - uc.set(0.0E0) - self._kernels[1]() + self._parloops[1]() def view(self, mat, viewer=None): if viewer is None: @@ -1272,8 +1270,7 @@ def getInfo(self, mat, info=None): else: raise ValueError("Unknown info type %s" % info) - @staticmethod - def make_blas_kernels(Vf, Vc): + def make_blas_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. @@ -1283,6 +1280,9 @@ def make_blas_kernels(Vf, Vc): and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ + if self.derivative: + # TODO hook up tabulate_exterior_derivative from fdm.py + raise ValueError felem = Vf.ufl_element() celem = Vc.ufl_element() fmapping = felem.mapping().lower() @@ -1412,14 +1412,47 @@ def make_blas_kernels(Vf, Vc): ldargs=BLASLAPACK_LIB.split(), requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients + def mat_mult_kernel(self, A, value_size, transpose=False): + if transpose: + A = A.T + name = "matmult" + code = f""" + void {name}(PetscScalar *restrict y, const PetscScalar *restrict x + {", const PetscScalar *restrict w" if transpose else ""} + ){{ + const PetscScalar A[] = {{ {", ".join(map(float.hex, A.flat))} }}; + {IntType_c} i0, i1; + for ({IntType_c} k = 0; k < {value_size}; k++) {{ + for ({IntType_c} i = 0; i < {A.shape[0]}; i++) {{ + i0 = i * {value_size} + k; + {"" if transpose else "y[i0] = 0;"} + for ({IntType_c} j = 0; j < {A.shape[1]}; j++) {{ + i1 = j * {value_size} + k; + y[i0] += A[i * {A.shape[1]} + j] * {"(x[i1] * w[i1])" if transpose else "x[i1]"}; + }} + }} + }} + }}""" + return op2.Kernel(code, name, requires_zeroed_output_arguments=transpose) + def make_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between arbitrary elements. This is temporary while we wait for dual evaluation in FInAT. """ - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) + if Vf.finat_element.formdegree == Vc.finat_element.formdegree + self.derivative and Vf.shape == Vc.shape: + derivative = None + if self.derivative: + derivative = {ufl.HCurl: "grad", ufl.HDiv: "curl", ufl.L2: "div"}[Vf.ufl_element().sobolev_space] + A = evaluate_dual(Vc.finat_element.fiat_equivalent, Vf.finat_element.fiat_equivalent, derivative=derivative) + return self.mat_mult_kernel(A, Vf.value_size), self.mat_mult_kernel(A, Vf.value_size, transpose=True), [] + + expr = self.uc + if self.derivative: + expr = ufl.exterior_derivative(expr) + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, expr) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, ufl.derivative(expr, self.uc)) # The way we transpose the prolongation kernel is suboptimal. # A local matrix is generated each time the kernel is executed. @@ -1449,48 +1482,45 @@ def make_kernels(self, Vf, Vc): ) return prolong_kernel, restrict_kernel, coefficients - def multTranspose(self, mat, rf, rc): - """ - Implement restriction: restrict residual on fine grid rf to coarse grid rc. - """ - with self.uf.dat.vec_wo as uf: - rf.copy(uf) - for bc in self.Vf_bcs: - bc.zero(self.uf) - - self._restrict() + def _copy(self, x, y): + if x.handle != y.handle: + x.copy(y) - for bc in self.Vc_bcs: - bc.zero(self.uc) - with self.uc.dat.vec_ro as uc: - uc.copy(rc) + def _op(self, action, x_bcs, y_bcs, x, y, X, Y, W=None, inc=False): + out = Y if W is None else W + if inc: + self._copy(Y, out) + with y.dat.vec_wo as v: + if W is None or inc: + v.zeroEntries() + else: + self._copy(Y, v) + with x.dat.vec_wo as v: + self._copy(X, v) + for bc in x_bcs: + bc.zero(x) + action() + for bc in y_bcs: + bc.zero(y) + with y.dat.vec_ro as v: + if inc: + out.axpy(1.0, v) + else: + self._copy(v, out) - def mult(self, mat, xc, xf, inc=False): - """ - Implement prolongation: prolong correction on coarse grid xc to fine grid xf. - """ - with self.uc.dat.vec_wo as uc: - xc.copy(uc) - for bc in self.Vc_bcs: - bc.zero(self.uc) + def mult(self, mat, X, Y): + """Prolong correction on coarse grid X to fine grid Y.""" + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y) - self._prolong() + def multAdd(self, mat, X, Y, W): + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y, W=W, inc=self._prolong_write) - for bc in self.Vf_bcs: - bc.zero(self.uf) - if inc: - with self.uf.dat.vec_ro as uf: - xf.axpy(1.0, uf) - else: - with self.uf.dat.vec_ro as uf: - uf.copy(xf) + def multTranspose(self, mat, X, Y): + """Restrict residual on fine grid X to coarse grid Y.""" + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y) - def multAdd(self, mat, x, y, w): - if y.handle == w.handle: - self.mult(mat, x, w, inc=True) - else: - self.mult(mat, x, w) - w.axpy(1.0, y) + def multTransposeAdd(self, mat, X, Y, W): + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y, W=W) class MixedInterpolationMatrix(StandaloneInterpolationMatrix): @@ -1512,7 +1542,12 @@ def _standalones(self): return standalones @cached_property - def _kernels(self): + def _parloops(self): + # FIXME we cannot be lazy, as we do not know a priori if prolongation has write access + # Therefore we must initiliaze all standalone parloops in order to know the prolongation access + for s in self._standalones: + s._parloops + self._prolong_write = any(s._prolong_write for s in self._standalones) prolong = lambda: [s._prolong() for s in self._standalones] restrict = lambda: [s._restrict() for s in self._standalones] return prolong, restrict From 1c65209211e376601bd864a8b37e4cc8e73bc166 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 26 Feb 2024 19:01:50 +0000 Subject: [PATCH 07/52] non_ghosted_lgmaps --- firedrake/preconditioners/fdm.py | 26 ++++- firedrake/preconditioners/pmg.py | 183 ++++++++++++++++++------------- 2 files changed, 133 insertions(+), 76 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 3f4c96f3f9..af9a065e98 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -190,6 +190,22 @@ def initialize(self, pc): else: fdmpc.setFromOptions() + def get_non_ghosted_lgmap(self, V): + lgmap = V.dof_dset.lgmap + indices = lgmap.indices.copy() + + ncell = V.mesh().cell_set.size + non_ghost_cell_nodes = numpy.unique(V.cell_node_list[:ncell].flatten()) + ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices)), + non_ghost_cell_nodes, + assume_unique=True) + indices[ghost_cell_nodes] = -1 + + #old_indices = [ind if i in non_ghost_cell_nodes else -1 for i, ind in enumerate(lgmap.indices)] + #assert numpy.allclose(indices, old_indices) + + return PETSc.LGMap().create(indices, bsize=lgmap.block_size, comm=lgmap.getComm()) + @PETSc.Log.EventDecorator("FDMPrealloc") def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensation, use_amat): """ @@ -233,6 +249,8 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} + self.non_ghosted_lgmaps = {Vsub: self.get_non_ghosted_lgmap(Vsub) for Vsub in V} + self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} @@ -291,8 +309,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - P.setLGMap(Vrow.dof_dset.lgmap, Vcol.dof_dset.lgmap) + P.setLGMap(self.non_ghosted_lgmaps[Vrow], + self.non_ghosted_lgmaps[Vcol]) P.setPreallocationNNZ((dnz, onz)) #P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False) @@ -319,7 +338,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) - #if len(bdofs) > 0: gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -429,6 +447,7 @@ def condense(self, A, J, bcs, fcp, pc_type="icc"): Pmats = dict(Smats) C0 = self.assemble_reference_tensor(V0) R0 = self.assemble_reference_tensor(V0, transpose=True) + A0 = TripleProductKernel(R0, self._element_mass_matrix, C0) K0 = InteriorSolveKernel(A0, J00, fcp=fcp, pc_type=pc_type) K1 = ImplicitSchurComplementKernel(K0) @@ -704,6 +723,9 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): # Interpolation of basis and exterior derivative onto broken spaces C1 = self.assemble_reference_tensor(Vcol) R1 = self.assemble_reference_tensor(Vrow, transpose=True) + + + # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b element_kernel = TripleProductKernel(R1, M, C1) schur_kernel = self.schur_kernel.get(Vrow) if on_diag else None diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 82b538aea6..49cdb42aad 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -452,9 +452,11 @@ def configure_pmg(self, pc, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") + if opts.getString("mg_coarse_pc_type") == "mg": + opts["mg_coarse_pc_mg_levels"] = min(opts.getInt("mg_coarse_pc_mg_levels", max_levels), max_levels) return ppc def apply(self, pc, x, y): @@ -495,10 +497,13 @@ def configure_pmg(self, snes, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") + if opts.getString("fas_coarse_pc_type") == "mg": + opts["fas_coarse_pc_mg_levels"] = min(opts.getInt("fas_coarse_pc_mg_levels", max_levels), max_levels) + if opts.getString("fas_coarse_snes_type") == "fas": + opts["fas_coarse_snes_fas_levels"] = min(opts.getInt("fas_coarse_snes_fas_levels", max_levels), max_levels) return psnes def step(self, snes, x, f, y): @@ -607,22 +612,18 @@ def evaluate_dual(source, target, derivative=None): A read-only :class:`numpy.ndarray` with the evaluation of the target dual basis on the (derivative of the) source primal basis. """ - primal = source.get_nodal_basis() + if derivative is None: + primal = source.get_nodal_basis() + else: + from FIAT.demkowicz import project_derivative + primal = project_derivative(source, derivative) + + coeffs = primal.get_coeffs() dual = target.get_dual_set() - A = dual.to_riesz(primal) - B = numpy.transpose(primal.get_coeffs()) - if derivative == "grad": - dmats = primal.get_dmats() - assert len(dmats) == 1 - alpha = (1,) - for i in range(len(alpha)): - for j in range(alpha[i]): - B = numpy.dot(dmats[i], B) - elif derivative in ("curl", "div"): - raise NotImplementedError(f"Dual evaluation of {derivative} is not yet implemented.") - elif derivative is not None: - raise ValueError(f"Invalid derivaitve type {derivative}.") - return get_readonly_view(numpy.dot(A, B)) + dual_mat = dual.to_riesz(primal) + A = dual_mat.reshape((dual_mat.shape[0], -1)) + B = coeffs.reshape((-1, A.shape[1])) + return get_readonly_view(numpy.dot(A, B.T)) @cached({}, key=generate_key_evaluate_dual) @@ -1168,16 +1169,16 @@ class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ - _cache_work = {} - def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs, derivative=False): self.uc = self.work_function(Vc) self.uf = self.work_function(Vf) self.Vc = self.uc.function_space() self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs self.Vf_bcs = Vf_bcs + self.derivative = derivative def work_function(self, V): if isinstance(V, firedrake.Function): @@ -1195,7 +1196,6 @@ def _weight(self): kernel_code = f""" void weight(PetscScalar *restrict w){{ for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; - return; }} """ kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) @@ -1205,7 +1205,8 @@ def _weight(self): return weight @cached_property - def _kernels(self): + def _parloops(self): + self._prolong_write = True try: # We generate custom prolongation and restriction kernels mainly because: # 1. Code generation for the transpose of prolongation is not readily available @@ -1217,6 +1218,7 @@ def _kernels(self): self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), self._weight.dat(op2.READ, uf_map)] + self._prolong_write = False except ValueError: # The elements do not have the expected tensor product structure # Fall back to aij kernels @@ -1237,14 +1239,10 @@ def _kernels(self): return prolong, restrict def _prolong(self): - with self.uf.dat.vec_wo as uf: - uf.set(0.0E0) - self._kernels[0]() + self._parloops[0]() def _restrict(self): - with self.uc.dat.vec_wo as uc: - uc.set(0.0E0) - self._kernels[1]() + self._parloops[1]() def view(self, mat, viewer=None): if viewer is None: @@ -1272,8 +1270,7 @@ def getInfo(self, mat, info=None): else: raise ValueError("Unknown info type %s" % info) - @staticmethod - def make_blas_kernels(Vf, Vc): + def make_blas_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. @@ -1283,6 +1280,9 @@ def make_blas_kernels(Vf, Vc): and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ + if self.derivative: + # TODO hook up tabulate_exterior_derivative from fdm.py + raise ValueError felem = Vf.ufl_element() celem = Vc.ufl_element() fmapping = felem.mapping().lower() @@ -1412,14 +1412,47 @@ def make_blas_kernels(Vf, Vc): ldargs=BLASLAPACK_LIB.split(), requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients + def mat_mult_kernel(self, A, value_size, transpose=False): + if transpose: + A = A.T + name = "matmult" + code = f""" + void {name}(PetscScalar *restrict y, const PetscScalar *restrict x + {", const PetscScalar *restrict w" if transpose else ""} + ){{ + const PetscScalar A[] = {{ {", ".join(map(float.hex, A.flat))} }}; + {IntType_c} i0, i1; + for ({IntType_c} k = 0; k < {value_size}; k++) {{ + for ({IntType_c} i = 0; i < {A.shape[0]}; i++) {{ + i0 = i * {value_size} + k; + {"" if transpose else "y[i0] = 0;"} + for ({IntType_c} j = 0; j < {A.shape[1]}; j++) {{ + i1 = j * {value_size} + k; + y[i0] += A[i * {A.shape[1]} + j] * {"(x[i1] * w[i1])" if transpose else "x[i1]"}; + }} + }} + }} + }}""" + return op2.Kernel(code, name, requires_zeroed_output_arguments=transpose) + def make_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between arbitrary elements. This is temporary while we wait for dual evaluation in FInAT. """ - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) + if Vf.finat_element.formdegree == Vc.finat_element.formdegree + self.derivative and Vf.shape == Vc.shape: + derivative = None + if self.derivative: + derivative = {ufl.HCurl: "grad", ufl.HDiv: "curl", ufl.L2: "div"}[Vf.ufl_element().sobolev_space] + A = evaluate_dual(Vc.finat_element.fiat_equivalent, Vf.finat_element.fiat_equivalent, derivative=derivative) + return self.mat_mult_kernel(A, Vf.value_size), self.mat_mult_kernel(A, Vf.value_size, transpose=True), [] + + expr = self.uc + if self.derivative: + expr = ufl.exterior_derivative(expr) + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, expr) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, ufl.derivative(expr, self.uc)) # The way we transpose the prolongation kernel is suboptimal. # A local matrix is generated each time the kernel is executed. @@ -1449,48 +1482,45 @@ def make_kernels(self, Vf, Vc): ) return prolong_kernel, restrict_kernel, coefficients - def multTranspose(self, mat, rf, rc): - """ - Implement restriction: restrict residual on fine grid rf to coarse grid rc. - """ - with self.uf.dat.vec_wo as uf: - rf.copy(uf) - for bc in self.Vf_bcs: - bc.zero(self.uf) - - self._restrict() + def _copy(self, x, y): + if x.handle != y.handle: + x.copy(y) - for bc in self.Vc_bcs: - bc.zero(self.uc) - with self.uc.dat.vec_ro as uc: - uc.copy(rc) + def _op(self, action, x_bcs, y_bcs, x, y, X, Y, W=None, inc=False): + out = Y if W is None else W + if inc: + self._copy(Y, out) + with y.dat.vec_wo as v: + if W is None or inc: + v.zeroEntries() + else: + self._copy(Y, v) + with x.dat.vec_wo as v: + self._copy(X, v) + for bc in x_bcs: + bc.zero(x) + action() + for bc in y_bcs: + bc.zero(y) + with y.dat.vec_ro as v: + if inc: + out.axpy(1.0, v) + else: + self._copy(v, out) - def mult(self, mat, xc, xf, inc=False): - """ - Implement prolongation: prolong correction on coarse grid xc to fine grid xf. - """ - with self.uc.dat.vec_wo as uc: - xc.copy(uc) - for bc in self.Vc_bcs: - bc.zero(self.uc) + def mult(self, mat, X, Y): + """Prolong correction on coarse grid X to fine grid Y.""" + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y) - self._prolong() + def multAdd(self, mat, X, Y, W): + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y, W=W, inc=self._prolong_write) - for bc in self.Vf_bcs: - bc.zero(self.uf) - if inc: - with self.uf.dat.vec_ro as uf: - xf.axpy(1.0, uf) - else: - with self.uf.dat.vec_ro as uf: - uf.copy(xf) + def multTranspose(self, mat, X, Y): + """Restrict residual on fine grid X to coarse grid Y.""" + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y) - def multAdd(self, mat, x, y, w): - if y.handle == w.handle: - self.mult(mat, x, w, inc=True) - else: - self.mult(mat, x, w) - w.axpy(1.0, y) + def multTransposeAdd(self, mat, X, Y, W): + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y, W=W) class MixedInterpolationMatrix(StandaloneInterpolationMatrix): @@ -1512,7 +1542,12 @@ def _standalones(self): return standalones @cached_property - def _kernels(self): + def _parloops(self): + # FIXME we cannot be lazy, as we do not know a priori if prolongation has write access + # Therefore we must initiliaze all standalone parloops in order to know the prolongation access + for s in self._standalones: + s._parloops + self._prolong_write = any(s._prolong_write for s in self._standalones) prolong = lambda: [s._prolong() for s in self._standalones] restrict = lambda: [s._restrict() for s in self._standalones] return prolong, restrict From 3e585a07a4d90f58a4b807d7f8177fc6d6e5acdc Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 27 Feb 2024 12:05:52 +0000 Subject: [PATCH 08/52] disable hex mesh space checks --- firedrake/functionspaceimpl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index a2ece362d8..3d4f8db2c5 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -49,8 +49,8 @@ def check_element(element, top=True): ValueError If the element is illegal. """ - if element.cell.cellname() == "hexahedron" and \ - element.family() not in ["Q", "DQ"]: + if False and element.cell.cellname() == "hexahedron" and \ + element.family() not in ["Q", "DQ", "DPC", "BrokenElement", "Real"]: raise NotImplementedError("Currently can only use 'Q' and/or 'DQ' elements on hexahedral meshes, not", element.family()) if type(element) in (finat.ufl.BrokenElement, finat.ufl.RestrictedElement, finat.ufl.HDivElement, finat.ufl.HCurlElement): From e810ef1e0f48735774d69514362479c35254d81b Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 27 Feb 2024 14:25:17 +0000 Subject: [PATCH 09/52] Transfer nullspace to MatNest blocks --- firedrake/preconditioners/facet_split.py | 24 ++++++++++++++-------- firedrake/preconditioners/fdm.py | 26 +++++++++++++++++------- 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index 9585d9bc62..222ebafde2 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -87,6 +87,17 @@ def restrict(ele, restriction_domain): self.perm = PETSc.IS().createGeneral(indices, comm=PETSc.COMM_SELF) self.iperm = self.perm.invertPermutation() + def _permute_nullspace(nsp): + if not (nsp.handle and self.iperm): + return nsp + pvecs = [] + for vec in nsp.getVecs(): + pvec = vec.duplicate() + self._vec_permute(vec, pvec, self.iperm) + pvecs.append(pvec) + + return PETSc.NullSpace().create(constant=nsp.hasConstant(), vectors=pvecs, comm=nsp.getComm()) + if mat_type != "submatrix": self.mixed_op = allocate_matrix(mixed_operator, bcs=mixed_bcs, @@ -99,15 +110,6 @@ def restrict(ele, restriction_domain): self._assemble_mixed_op() mixed_opmat = self.mixed_op.petscmat - def _permute_nullspace(nsp): - if not (nsp.handle and self.iperm): - return nsp - vecs = [vec.duplicate() for vec in nsp.getVecs()] - for vec in vecs: - self.work_vec_x.getArray(readonly=False)[:] = vec.getArray(readonly=True)[:] - self._vec_permute(self.work_vec_x, vec, self.iperm) - return PETSc.NullSpace().create(constant=nsp.hasConstant(), vectors=vecs, comm=nsp.getComm()) - mixed_opmat.setNullSpace(_permute_nullspace(P.getNullSpace())) mixed_opmat.setNearNullSpace(_permute_nullspace(P.getNearNullSpace())) mixed_opmat.setTransposeNullSpace(_permute_nullspace(P.getTransposeNullSpace())) @@ -116,6 +118,10 @@ def _permute_nullspace(nsp): self._global_iperm = PETSc.IS().createGeneral(global_indices, comm=P.getComm()) self._permute_op = partial(PETSc.Mat().createSubMatrixVirtual, P, self._global_iperm, self._global_iperm) mixed_opmat = self._permute_op() + + mixed_opmat.setNullSpace(_permute_nullspace(P.getNullSpace())) + mixed_opmat.setNearNullSpace(_permute_nullspace(P.getNearNullSpace())) + mixed_opmat.setTransposeNullSpace(_permute_nullspace(P.getTransposeNullSpace())) else: mixed_opmat = P diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index af9a065e98..171318e8b7 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -177,9 +177,8 @@ def initialize(self, pc): # Assemble the FDM preconditioner with sparse local matrices Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) - Pmat.setNullSpace(Amat.getNullSpace()) - Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) - Pmat.setNearNullSpace(Amat.getNearNullSpace()) + + self._assemble_P() fdmpc.setOperators(A=Amat, P=Pmat) @@ -191,6 +190,7 @@ def initialize(self, pc): fdmpc.setFromOptions() def get_non_ghosted_lgmap(self, V): + # TODO extruded meshes lgmap = V.dof_dset.lgmap indices = lgmap.indices.copy() @@ -200,10 +200,6 @@ def get_non_ghosted_lgmap(self, V): non_ghost_cell_nodes, assume_unique=True) indices[ghost_cell_nodes] = -1 - - #old_indices = [ind if i in non_ghost_cell_nodes else -1 for i, ind in enumerate(lgmap.indices)] - #assert numpy.allclose(indices, old_indices) - return PETSc.LGMap().create(indices, bsize=lgmap.block_size, comm=lgmap.getComm()) @PETSc.Log.EventDecorator("FDMPrealloc") @@ -345,10 +341,26 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati diagonal_terms.append(partial(P.setDiagonal, diag, addv=addv)) Pmats[Vrow, Vcol] = P + def sub_nullspace(nsp, iset): + if not nsp.handle: + return nsp + return PETSc.NullSpace().create(constant=nsp.hasConstant(), + vectors=[vec.getSubVector(iset) for vec in nsp.getVecs()], + comm=nsp.getComm()) + if len(V) == 1: Pmat = Pmats[V, V] + Pmat.setNullSpace(Amat.getNullSpace()) + Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) + Pmat.setNearNullSpace(Amat.getNearNullSpace()) + else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) + for Vrow, iset in zip(V, Pmat.getNestISs()[0]): + Pmats[Vrow, Vrow].setNullSpace(sub_nullspace(Amat.getNullSpace(), iset)) + Pmats[Vrow, Vrow].setTransposeNullSpace(sub_nullspace(Amat.getTransposeNullSpace(), iset)) + Pmats[Vrow, Vrow].setNearNullSpace(sub_nullspace(Amat.getNearNullSpace(), iset)) + assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables From e2357ee56295ca9ab42a6f0b035a7118ac89c809 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 27 Feb 2024 17:32:39 +0000 Subject: [PATCH 10/52] dmhooks: handle the single function space case --- firedrake/dmhooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/dmhooks.py b/firedrake/dmhooks.py index 067243e4e7..c1cca65b56 100644 --- a/firedrake/dmhooks.py +++ b/firedrake/dmhooks.py @@ -343,7 +343,7 @@ def create_field_decomposition(dm, *args, **kwargs): for d in dms: add_hook(parent, setup=partial(push_parent, d, parent), teardown=partial(pop_parent, d, parent), call_setup=True) - if ctx is not None: + if ctx is not None and len(W) > 1: ctxs = ctx.split([(i, ) for i in range(len(W))]) for d, c in zip(dms, ctxs): add_hook(parent, setup=partial(push_appctx, d, c), teardown=partial(pop_appctx, d, c), From 58a970f7b9f4d405831a17778ffe058ece292ddd Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 27 Feb 2024 12:04:57 +0000 Subject: [PATCH 11/52] fdm: fix assembly --- firedrake/preconditioners/fdm.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 3fb202c079..6e8cfeb71e 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -323,9 +323,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.non_ghosted_lgmaps[Vcol]) P.setPreallocationNNZ((dnz, onz)) - #P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False) - P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, False) - #P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) + P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): @@ -333,9 +331,13 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P.setUp() self.set_values(P, Vrow, Vcol, addv, mat_type=ptype) - P.assemble() + # populate diagonal entries if on_diag: - P.shift(1) + n = len(self.non_ghosted_lgmaps[Vrow].indices) + i = numpy.arange(n,dtype=PETSc.IntType).reshape(-1,1) + v = numpy.ones(n,dtype=PETSc.ScalarType).reshape(-1,1) + P.setValuesLocalRCV(i,i,v,addv=addv) + P.assemble() # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) @@ -344,7 +346,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) - assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) From b3e17658b61798a52615a642ac3b3628cb920c96 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 28 Feb 2024 12:03:57 +0000 Subject: [PATCH 12/52] lgmaps: add non_ghost_cells kwarg and support extruded meshes --- firedrake/functionspaceimpl.py | 32 ++++++++++++++++++++++++++++++-- firedrake/preconditioners/fdm.py | 25 ++++--------------------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index 3d4f8db2c5..5351782dec 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -740,7 +740,7 @@ def boundary_nodes(self, sub_domain): return self._shared_data.boundary_nodes(self, sub_domain) @PETSc.Log.EventDecorator() - def local_to_global_map(self, bcs, lgmap=None): + def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): r"""Return a map from process local dof numbering to global dof numbering. If BCs is provided, mask out those dofs which match the BC nodes.""" @@ -748,7 +748,7 @@ def local_to_global_map(self, bcs, lgmap=None): # not just on the bcs, but also the parent space, and anything # this space has been recursively split out from [e.g. inside # fieldsplit] - if bcs is None or len(bcs) == 0: + if not non_ghost_cells and (bcs is None or len(bcs) == 0): return lgmap or self.dof_dset.lgmap for bc in bcs: fs = bc.function_space() @@ -783,6 +783,34 @@ def local_to_global_map(self, bcs, lgmap=None): nodes.append(tmp + i) else: nodes.append(bc.nodes) + if non_ghost_cells: + mesh = self.mesh() + ncell = mesh.cell_set.size + if mesh.cell_set._extruded: + if mesh.variable_layers: + cell_nodes = [] + layers = mesh.layers[:, 1] - mesh.layers[:, 0] - 1 + offsets = numpy.outer(numpy.arange(max(layers), dtype=self.offset.dtype), self.offset) + for nlayers, base_nodes in zip(layers, self.cell_node_list[:ncell]): + cell_nodes.append(base_nodes[None, :] + offsets[:nlayers, :]) + cell_nodes = numpy.concatenate(cell_nodes) + else: + nlayers = mesh.layers - 1 + offsets = numpy.outer(numpy.arange(nlayers, dtype=self.offset.dtype), self.offset) + cell_nodes = self.cell_node_list[None, :ncell, :] + offsets[:, None, :] + else: + cell_nodes = self.cell_node_list[:ncell] + + non_ghost_cell_nodes = numpy.unique(cell_nodes.flatten()) + cdim = self.dof_dset.cdim + if not unblocked and cdim > 1: + non_ghost_cell_nodes *= cdim + non_ghost_cell_nodes = numpy.add.outer(non_ghost_cell_nodes, + numpy.arange(cdim, dtype=non_ghost_cell_nodes.dtype)) + ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices), dtype=non_ghost_cell_nodes.dtype), + non_ghost_cell_nodes, + assume_unique=True) + nodes.append(ghost_cell_nodes) nodes = numpy.unique(numpy.concatenate(nodes)) indices[nodes] = -1 return PETSc.LGMap().create(indices, bsize=bsize, comm=lgmap.comm) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f0ccf8b4a7..573b434377 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -177,8 +177,6 @@ def initialize(self, pc): # Assemble the FDM preconditioner with sparse local matrices Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) - - self._assemble_P() fdmpc.setOperators(A=Amat, P=Pmat) @@ -189,19 +187,6 @@ def initialize(self, pc): else: fdmpc.setFromOptions() - def get_non_ghosted_lgmap(self, V): - # TODO extruded meshes - lgmap = V.dof_dset.lgmap - indices = lgmap.indices.copy() - - ncell = V.mesh().cell_set.size - non_ghost_cell_nodes = numpy.unique(V.cell_node_list[:ncell].flatten()) - ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices)), - non_ghost_cell_nodes, - assume_unique=True) - indices[ghost_cell_nodes] = -1 - return PETSc.LGMap().create(indices, bsize=lgmap.block_size, comm=lgmap.getComm()) - @PETSc.Log.EventDecorator("FDMPrealloc") def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensation, use_amat): """ @@ -245,7 +230,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.non_ghosted_lgmaps = {Vsub: self.get_non_ghosted_lgmap(Vsub) for Vsub in V} + self.non_ghosted_lgmaps = {Vsub: Vsub.local_to_global_map([], lgmap=Vsub.dof_dset.lgmap, non_ghost_cells=True) for Vsub in V} self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) @@ -321,9 +306,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # populate diagonal entries if on_diag: n = len(self.non_ghosted_lgmaps[Vrow].indices) - i = numpy.arange(n,dtype=PETSc.IntType).reshape(-1,1) - v = numpy.ones(n,dtype=PETSc.ScalarType).reshape(-1,1) - P.setValuesLocalRCV(i,i,v,addv=addv) + i = numpy.arange(n, dtype=PETSc.IntType).reshape(-1, 1) + v = numpy.ones(i.shape, dtype=PETSc.ScalarType) + P.setValuesLocalRCV(i, i, v, addv=addv) P.assemble() # append callables to zero entries, insert element matrices, and apply BCs @@ -737,8 +722,6 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): C1 = self.assemble_reference_tensor(Vcol) R1 = self.assemble_reference_tensor(Vrow, transpose=True) - - # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b element_kernel = TripleProductKernel(R1, M, C1) schur_kernel = self.schur_kernel.get(Vrow) if on_diag else None From 82a0daa858f5ea90ab67323f02026438e9bbb3b7 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 28 Feb 2024 12:18:02 +0000 Subject: [PATCH 13/52] FDMPC: cache preallocator kernel and use it on Pmat to freeze nonzero pattern --- firedrake/preconditioners/fdm.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 573b434377..e5dcc73f05 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -302,7 +302,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) P.setUp() - self.set_values(P, Vrow, Vcol, addv, mat_type=ptype) + self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries if on_diag: n = len(self.non_ghosted_lgmaps[Vrow].indices) @@ -744,13 +744,19 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): coefficients_acc, *indices_acc) self.assemblers.setdefault(key, assembler) - if A.getType() == "preallocator": - # Determine the global sparsity pattern by inserting a constant sparse element matrix - args = assembler.arguments[:2] - kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag) - assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, - *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args), - *indices_acc) + if A.getType() == "preallocator" or mat_type == "preallocator": + key = key + ("preallocator",) + try: + assembler = self.assemblers[key] + except KeyError: + # Determine the global sparsity pattern by inserting a constant sparse element matrix + args = assembler.arguments[:2] + kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag, addv=addv) + assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, + *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args), + *indices_acc) + self.assemblers.setdefault(key, assembler) + assembler.arguments[0].data = A.handle assembler() From 36bb6ab165269f0f5042ac6df0d8f7b2506ab3c8 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 28 Feb 2024 17:55:31 +0000 Subject: [PATCH 14/52] UNUSED_NONZERO_LOCATION_ERR --- firedrake/preconditioners/fdm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index e5dcc73f05..81789f6f31 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -296,6 +296,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P.setPreallocationNNZ((dnz, onz)) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): From f2e60c26e435113e903864077ab7f188b119b518 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 28 Feb 2024 18:22:44 +0000 Subject: [PATCH 15/52] cleanup --- firedrake/functionspaceimpl.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index 5351782dec..abcd21f33c 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -785,21 +785,9 @@ def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): nodes.append(bc.nodes) if non_ghost_cells: mesh = self.mesh() - ncell = mesh.cell_set.size + cell_nodes = self.cell_node_list[:mesh.cell_set.size] if mesh.cell_set._extruded: - if mesh.variable_layers: - cell_nodes = [] - layers = mesh.layers[:, 1] - mesh.layers[:, 0] - 1 - offsets = numpy.outer(numpy.arange(max(layers), dtype=self.offset.dtype), self.offset) - for nlayers, base_nodes in zip(layers, self.cell_node_list[:ncell]): - cell_nodes.append(base_nodes[None, :] + offsets[:nlayers, :]) - cell_nodes = numpy.concatenate(cell_nodes) - else: - nlayers = mesh.layers - 1 - offsets = numpy.outer(numpy.arange(nlayers, dtype=self.offset.dtype), self.offset) - cell_nodes = self.cell_node_list[None, :ncell, :] + offsets[:, None, :] - else: - cell_nodes = self.cell_node_list[:ncell] + cell_nodes = extrude_cell_node_list(cell_nodes, self.offset, mesh.layers) non_ghost_cell_nodes = numpy.unique(cell_nodes.flatten()) cdim = self.dof_dset.cdim @@ -1192,3 +1180,16 @@ class FunctionSpaceCargo: topological: FunctionSpace parent: Optional[WithGeometryBase] + + +def extrude_cell_node_list(cell_node_list, offset, layers): + if layers.size == 1: + nlayers = layers - 1 + offsets = numpy.outer(numpy.arange(nlayers, dtype=offset.dtype), offset) + cell_nodes = cell_node_list[:, None, :] + offsets[None, :, :] + else: + nlayers = layers[:, 1] - layers[:, 0] - 1 + offsets = numpy.outer(numpy.arange(max(nlayers), dtype=offset.dtype), offset) + cell_nodes = numpy.concatenate([base_nodes[None, :] + offsets[:n, :] + for n, base_nodes in zip(nlayers, cell_node_list)]) + return cell_nodes From 2deb254c934dcda19be8d9d2f3031200d3585443 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 29 Feb 2024 23:34:29 +0000 Subject: [PATCH 16/52] set values local --- firedrake/functionspaceimpl.py | 1 - firedrake/preconditioners/fdm.py | 62 +++++++++++++++++++++----------- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index abcd21f33c..099a3864ee 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -788,7 +788,6 @@ def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): cell_nodes = self.cell_node_list[:mesh.cell_set.size] if mesh.cell_set._extruded: cell_nodes = extrude_cell_node_list(cell_nodes, self.offset, mesh.layers) - non_ghost_cell_nodes = numpy.unique(cell_nodes.flatten()) cdim = self.dof_dset.cdim if not unblocked and cdim > 1: diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 81789f6f31..51005c5eb9 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -15,6 +15,7 @@ from firedrake.cofunction import Cofunction from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions from firedrake.utils import cached_property +from firedrake.functionspaceimpl import extrude_cell_node_list from firedrake_citations import Citations from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices @@ -229,10 +230,32 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) # Create data structures needed for assembly + def mask_local_indices(indices): + local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) + local_indices[indices < 0] = -1 + return local_indices + + def get_lgmap(V, broken=False): + lgmap = V.dof_dset.lgmap + if broken: + mesh = V.mesh() + ncell = mesh.cell_set.size + indices = V.cell_node_list[:ncell] + if V.extruded: + indices = extrude_cell_node_list(indices, V.offset, mesh.layers) + lgmap.apply(indices, result=indices) + return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) + else: + return V.local_to_global_map([], lgmap=lgmap, non_ghost_cells=True) + + self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.non_ghosted_lgmaps = {Vsub: Vsub.local_to_global_map([], lgmap=Vsub.dof_dset.lgmap, non_ghost_cells=True) for Vsub in V} + + self.indices = {Vsub: op2.Dat(Vsub.dof_dset, + mask_local_indices(self.lgmaps[Vsub].indices), + dtype=PETSc.IntType) for Vsub in V} - self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V} + #self.indices = {Vsub: op2.Dat(Vsub.dof_dset, numpy.arange(Vsub.dof_dset.set.size, dtype=PETSc.IntType), dtype=PETSc.IntType) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} self.kernels = [] @@ -274,12 +297,15 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati on_diag = Vrow == Vcol sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ + rlgmap = self.non_ghosted_lgmaps[Vrow] + clgmap = self.non_ghosted_lgmaps[Vcol] preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) - preallocator.setUp() + preallocator.setLGMap(rlgmap, clgmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) + preallocator.setUp() self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype) preallocator.assemble() dnz, onz = get_preallocation(preallocator, sizes[0][0]) @@ -290,13 +316,11 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - - P.setLGMap(self.non_ghosted_lgmaps[Vrow], - self.non_ghosted_lgmaps[Vcol]) + P.setLGMap(rlgmap, clgmap) P.setPreallocationNNZ((dnz, onz)) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") + #P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): @@ -306,7 +330,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries if on_diag: - n = len(self.non_ghosted_lgmaps[Vrow].indices) + n = len(self.indices[Vrow].data_ro_with_halos) i = numpy.arange(n, dtype=PETSc.IntType).reshape(-1, 1) v = numpy.ones(i.shape, dtype=PETSc.ScalarType) P.setValuesLocalRCV(i, i, v, addv=addv) @@ -316,11 +340,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati assembly_callables.append(P.zeroEntries) assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype)) if on_diag: - own = Vrow.dof_dset.layout_vec.getLocalSize() - bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] - Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) + bdofs = numpy.flatnonzero(self.indices[Vrow].data_ro < 0).astype(PETSc.IntType)[:, None] assembly_callables.append(P.assemble) - assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) + assembly_callables.append(partial(P.zeroRowsLocal, bdofs, 1.0)) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -768,7 +790,7 @@ class ElementKernel: code = dedent(""" PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { PetscFunctionBeginUser; - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); PetscFunctionReturn(PETSC_SUCCESS); }""") @@ -806,10 +828,10 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): for (PetscInt j = ai[i]; j < ai[i + 1]; j++) indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);""" code += dedent(""" - static inline PetscErrorCode MatSetValuesSparse(const Mat A, const Mat B, - const PetscInt *restrict rindices, - const PetscInt *restrict cindices, - InsertMode addv) { + static inline PetscErrorCode MatSetValuesLocalSparse(const Mat A, const Mat B, + const PetscInt *restrict rindices, + const PetscInt *restrict cindices, + InsertMode addv) { PetscBool done; PetscInt m, ncols, istart, *indices; const PetscInt *ai, *aj; @@ -823,7 +845,7 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): istart = ai[i]; ncols = ai[i + 1] - istart; %(select_cols)s - PetscCall(MatSetValues(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); + PetscCall(MatSetValuesLocal(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); } PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); @@ -848,7 +870,7 @@ class TripleProductKernel(ElementKernel): PetscCall(MatProductGetMats(B, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); PetscFunctionReturn(PETSC_SUCCESS); }""") @@ -870,7 +892,7 @@ class SchurComplementKernel(ElementKernel): PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); %(condense)s - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); PetscFunctionReturn(PETSC_SUCCESS); }""") From 49390c77a340634da0d408b1dcc2f4152f2ce90c Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 1 Mar 2024 14:37:10 +0000 Subject: [PATCH 17/52] fix F4 typo --- firedrake/preconditioners/fdm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 51005c5eb9..804db02757 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -250,7 +250,6 @@ def get_lgmap(V, broken=False): self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.indices = {Vsub: op2.Dat(Vsub.dof_dset, mask_local_indices(self.lgmaps[Vsub].indices), dtype=PETSc.IntType) for Vsub in V} From 9fc3d8525fc3bf2370ad9e557385ef8d07639870 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 1 Mar 2024 20:20:12 +0000 Subject: [PATCH 18/52] Correct broken LGMap --- firedrake/preconditioners/fdm.py | 46 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 804db02757..ad5b46c212 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -230,17 +230,12 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) # Create data structures needed for assembly - def mask_local_indices(indices): - local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) - local_indices[indices < 0] = -1 - return local_indices - def get_lgmap(V, broken=False): lgmap = V.dof_dset.lgmap if broken: mesh = V.mesh() ncell = mesh.cell_set.size - indices = V.cell_node_list[:ncell] + indices = V.cell_node_list[:ncell].copy() if V.extruded: indices = extrude_cell_node_list(indices, V.offset, mesh.layers) lgmap.apply(indices, result=indices) @@ -248,13 +243,27 @@ def get_lgmap(V, broken=False): else: return V.local_to_global_map([], lgmap=lgmap, non_ghost_cells=True) - self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub) for Vsub in V} - self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.indices = {Vsub: op2.Dat(Vsub.dof_dset, - mask_local_indices(self.lgmaps[Vsub].indices), - dtype=PETSc.IntType) for Vsub in V} + def mask_local_indices(V, lgmap, broken=False): + if broken: + indices = V.cell_node_list.copy() + if V.extruded: + indices = extrude_cell_node_list(indices, V.offset, V.mesh().layers) + mask = indices.flatten() + lgmap.apply(mask, result=mask) + V = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) + else: + mask = lgmap.indices + + indices = numpy.arange(len(mask), dtype=PETSc.IntType) + indices[mask == -1] = -1 + indices_dat = op2.Dat(V.dof_dset, indices, dtype=PETSc.IntType) + indices_acc = indices_dat(op2.READ, V.cell_node_map()) + return indices_acc - #self.indices = {Vsub: op2.Dat(Vsub.dof_dset, numpy.arange(Vsub.dof_dset.set.size, dtype=PETSc.IntType), dtype=PETSc.IntType) for Vsub in V} + broken = pmat_type == "is" + self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, broken) for Vsub in V} + self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} + self.indices = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} self.kernels = [] @@ -319,7 +328,7 @@ def get_lgmap(V, broken=False): P.setPreallocationNNZ((dnz, onz)) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - #P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") + P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): @@ -329,7 +338,7 @@ def get_lgmap(V, broken=False): self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries if on_diag: - n = len(self.indices[Vrow].data_ro_with_halos) + n = len(self.non_ghosted_lgmaps[Vrow].indices) i = numpy.arange(n, dtype=PETSc.IntType).reshape(-1, 1) v = numpy.ones(i.shape, dtype=PETSc.ScalarType) P.setValuesLocalRCV(i, i, v, addv=addv) @@ -339,9 +348,12 @@ def get_lgmap(V, broken=False): assembly_callables.append(P.zeroEntries) assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype)) if on_diag: - bdofs = numpy.flatnonzero(self.indices[Vrow].data_ro < 0).astype(PETSc.IntType)[:, None] + own = Vrow.dof_dset.layout_vec.getLocalSize() + bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] + Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) - assembly_callables.append(partial(P.zeroRowsLocal, bdofs, 1.0)) + assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) + # assembly_callables.append(P.view) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -757,7 +769,7 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): TripleProductKernel(R0, M, C0)) self.kernels.append(element_kernel) spaces = (Vrow, Vcol)[on_diag:] - indices_acc = tuple(self.indices[V](op2.READ, V.cell_node_map()) for V in spaces) + indices_acc = tuple(self.indices[V] for V in spaces) coefficients = self.coefficients["cell"] coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map()) kernel = element_kernel.kernel(on_diag=on_diag, addv=addv) From 6b05029d4039db29ee6d378f960b3f08089432f1 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sat, 2 Mar 2024 13:08:04 +0000 Subject: [PATCH 19/52] remove PetscFunctionBeginUser --- firedrake/preconditioners/fdm.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index ad5b46c212..094fb34ba0 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -376,12 +376,12 @@ def sub_nullspace(nsp, iset): else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) + assembly_callables.append(Pmat.assemble) for Vrow, iset in zip(V, Pmat.getNestISs()[0]): Pmats[Vrow, Vrow].setNullSpace(sub_nullspace(Amat.getNullSpace(), iset)) Pmats[Vrow, Vrow].setTransposeNullSpace(sub_nullspace(Amat.getTransposeNullSpace(), iset)) Pmats[Vrow, Vrow].setNearNullSpace(sub_nullspace(Amat.getNearNullSpace(), iset)) - assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables @@ -800,7 +800,6 @@ class ElementKernel: By default, it inserts the same matrix on each cell.""" code = dedent(""" PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { - PetscFunctionBeginUser; PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); PetscFunctionReturn(PETSC_SUCCESS); }""") @@ -826,7 +825,6 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): PetscInt m; const PetscInt *ai; PetscScalar *vals; - PetscFunctionBeginUser; PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatSeqAIJGetArrayWrite(A, &vals)); PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals))); @@ -847,7 +845,6 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): PetscInt m, ncols, istart, *indices; const PetscInt *ai, *aj; const PetscScalar *vals; - PetscFunctionBeginUser; PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); PetscCall(PetscMalloc1(ai[m], &indices)); for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]]; @@ -877,7 +874,6 @@ class TripleProductKernel(ElementKernel): const PetscScalar *restrict coefficients, %(indices)s) { Mat C; - PetscFunctionBeginUser; PetscCall(MatProductGetMats(B, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); @@ -899,7 +895,6 @@ class SchurComplementKernel(ElementKernel): const Mat A11, const Mat A10, const Mat A01, const Mat A00, const PetscScalar *restrict coefficients, %(indices)s) { Mat C; - PetscFunctionBeginUser; PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); %(condense)s @@ -994,7 +989,6 @@ class SchurComplementBlockCholesky(SchurComplementKernel): const PetscInt *ai; PetscScalar *vals, *U; Mat X; - PetscFunctionBeginUser; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A01)); PetscCall(MatProductNumeric(A00)); @@ -1061,7 +1055,6 @@ class SchurComplementBlockLU(SchurComplementKernel): const PetscInt *ai; PetscScalar *vals, *work, *L, *U; Mat X; - PetscFunctionBeginUser; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A10)); PetscCall(MatProductNumeric(A01)); @@ -1164,7 +1157,6 @@ class SchurComplementBlockInverse(SchurComplementKernel): PetscInt m, irow, bsize, *ipiv; const PetscInt *ai; PetscScalar *vals, *work, *ainv, swork; - PetscFunctionBeginUser; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A10)); PetscCall(MatProductNumeric(A01)); @@ -1287,7 +1279,6 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False): static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) { PetscScalar **appctx, *y; const PetscScalar *x; - PetscFunctionBeginUser; PetscCall(MatShellGetContext(A, &appctx)); PetscCall(VecZeroEntries(Y)); PetscCall(VecGetArray(Y, &y)); @@ -1314,7 +1305,6 @@ class InteriorSolveKernel(ElementKernel): PetscInt m; Mat A, B, C; Vec X, Y; - PetscFunctionBeginUser; PetscCall(KSPGetOperators(ksp, &A, &B)); PetscCall(MatShellSetContext(A, &appctx)); PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); @@ -1381,7 +1371,6 @@ class ImplicitSchurComplementKernel(ElementKernel): PetscInt i; Mat A, B, C; Vec X, Y; - PetscFunctionBeginUser; PetscCall(KSPGetOperators(ksp, &A, &B)); PetscCall(MatShellSetContext(A, &appctx)); PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); @@ -1815,7 +1804,6 @@ def load_setSubMatCSR(comm, triu=False): PetscInt m, ncols, irow, icol; PetscInt *cols, *indices; PetscScalar *vals; - PetscFunctionBeginUser; PetscCall(MatGetSize(B, &m, NULL)); PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols)); PetscCall(PetscMalloc1(ncols, &indices)); From f9f35d8f0588fe60470f01678eb301d39e8a1189 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Sun, 3 Mar 2024 11:06:55 +0000 Subject: [PATCH 20/52] merge master --- firedrake/preconditioners/pmg.py | 183 +++++++++++++------------------ 1 file changed, 74 insertions(+), 109 deletions(-) diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 35ff04b7ee..0d808eed6c 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -452,11 +452,9 @@ def configure_pmg(self, pc, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - max_levels = odm.getRefineLevel() + 1 - if max_levels > 1: - opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - if opts.getString("mg_coarse_pc_type") == "mg": - opts["mg_coarse_pc_mg_levels"] = min(opts.getInt("mg_coarse_pc_mg_levels", max_levels), max_levels) + opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") + opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + return ppc def apply(self, pc, x, y): @@ -497,13 +495,10 @@ def configure_pmg(self, snes, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - max_levels = odm.getRefineLevel() + 1 - if max_levels > 1: - opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - if opts.getString("fas_coarse_pc_type") == "mg": - opts["fas_coarse_pc_mg_levels"] = min(opts.getInt("fas_coarse_pc_mg_levels", max_levels), max_levels) - if opts.getString("fas_coarse_snes_type") == "fas": - opts["fas_coarse_snes_fas_levels"] = min(opts.getInt("fas_coarse_snes_fas_levels", max_levels), max_levels) + opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") + opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 + opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 + return psnes def step(self, snes, x, f, y): @@ -612,18 +607,22 @@ def evaluate_dual(source, target, derivative=None): A read-only :class:`numpy.ndarray` with the evaluation of the target dual basis on the (derivative of the) source primal basis. """ - if derivative is None: - primal = source.get_nodal_basis() - else: - from FIAT.demkowicz import project_derivative - primal = project_derivative(source, derivative) - - coeffs = primal.get_coeffs() + primal = source.get_nodal_basis() dual = target.get_dual_set() - dual_mat = dual.to_riesz(primal) - A = dual_mat.reshape((dual_mat.shape[0], -1)) - B = coeffs.reshape((-1, A.shape[1])) - return get_readonly_view(numpy.dot(A, B.T)) + A = dual.to_riesz(primal) + B = numpy.transpose(primal.get_coeffs()) + if derivative == "grad": + dmats = primal.get_dmats() + assert len(dmats) == 1 + alpha = (1,) + for i in range(len(alpha)): + for j in range(alpha[i]): + B = numpy.dot(dmats[i], B) + elif derivative in ("curl", "div"): + raise NotImplementedError(f"Dual evaluation of {derivative} is not yet implemented.") + elif derivative is not None: + raise ValueError(f"Invalid derivaitve type {derivative}.") + return get_readonly_view(numpy.dot(A, B)) @cached({}, key=generate_key_evaluate_dual) @@ -1169,16 +1168,16 @@ class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ + _cache_work = {} - def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs, derivative=False): + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): self.uc = self.work_function(Vc) self.uf = self.work_function(Vf) self.Vc = self.uc.function_space() self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs self.Vf_bcs = Vf_bcs - self.derivative = derivative def work_function(self, V): if isinstance(V, firedrake.Function): @@ -1196,6 +1195,7 @@ def _weight(self): kernel_code = f""" void weight(PetscScalar *restrict w){{ for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; + return; }} """ kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) @@ -1205,8 +1205,7 @@ def _weight(self): return weight @cached_property - def _parloops(self): - self._prolong_write = True + def _kernels(self): try: # We generate custom prolongation and restriction kernels mainly because: # 1. Code generation for the transpose of prolongation is not readily available @@ -1218,7 +1217,6 @@ def _parloops(self): self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), self._weight.dat(op2.READ, uf_map)] - self._prolong_write = False except ValueError: # The elements do not have the expected tensor product structure # Fall back to aij kernels @@ -1239,10 +1237,14 @@ def _parloops(self): return prolong, restrict def _prolong(self): - self._parloops[0]() + with self.uf.dat.vec_wo as uf: + uf.set(0.0E0) + self._kernels[0]() def _restrict(self): - self._parloops[1]() + with self.uc.dat.vec_wo as uc: + uc.set(0.0E0) + self._kernels[1]() def view(self, mat, viewer=None): if viewer is None: @@ -1270,7 +1272,8 @@ def getInfo(self, mat, info=None): else: raise ValueError("Unknown info type %s" % info) - def make_blas_kernels(self, Vf, Vc): + @staticmethod + def make_blas_kernels(Vf, Vc): """ Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. @@ -1280,9 +1283,6 @@ def make_blas_kernels(self, Vf, Vc): and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ - if self.derivative: - # TODO hook up tabulate_exterior_derivative from fdm.py - raise ValueError felem = Vf.ufl_element() celem = Vc.ufl_element() fmapping = felem.mapping().lower() @@ -1412,47 +1412,14 @@ def make_blas_kernels(self, Vf, Vc): ldargs=BLASLAPACK_LIB.split(), requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients - def mat_mult_kernel(self, A, value_size, transpose=False): - if transpose: - A = A.T - name = "matmult" - code = f""" - void {name}(PetscScalar *restrict y, const PetscScalar *restrict x - {", const PetscScalar *restrict w" if transpose else ""} - ){{ - const PetscScalar A[] = {{ {", ".join(map(float.hex, A.flat))} }}; - {IntType_c} i0, i1; - for ({IntType_c} k = 0; k < {value_size}; k++) {{ - for ({IntType_c} i = 0; i < {A.shape[0]}; i++) {{ - i0 = i * {value_size} + k; - {"" if transpose else "y[i0] = 0;"} - for ({IntType_c} j = 0; j < {A.shape[1]}; j++) {{ - i1 = j * {value_size} + k; - y[i0] += A[i * {A.shape[1]} + j] * {"(x[i1] * w[i1])" if transpose else "x[i1]"}; - }} - }} - }} - }}""" - return op2.Kernel(code, name, requires_zeroed_output_arguments=transpose) - def make_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between arbitrary elements. This is temporary while we wait for dual evaluation in FInAT. """ - if Vf.finat_element.formdegree == Vc.finat_element.formdegree + self.derivative and Vf.shape == Vc.shape: - derivative = None - if self.derivative: - derivative = {ufl.HCurl: "grad", ufl.HDiv: "curl", ufl.L2: "div"}[Vf.ufl_element().sobolev_space] - A = evaluate_dual(Vc.finat_element.fiat_equivalent, Vf.finat_element.fiat_equivalent, derivative=derivative) - return self.mat_mult_kernel(A, Vf.value_size), self.mat_mult_kernel(A, Vf.value_size, transpose=True), [] - - expr = self.uc - if self.derivative: - expr = ufl.exterior_derivative(expr) - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, expr) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, ufl.derivative(expr, self.uc)) + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) # The way we transpose the prolongation kernel is suboptimal. # A local matrix is generated each time the kernel is executed. @@ -1482,45 +1449,48 @@ def make_kernels(self, Vf, Vc): ) return prolong_kernel, restrict_kernel, coefficients - def _copy(self, x, y): - if x.handle != y.handle: - x.copy(y) + def multTranspose(self, mat, rf, rc): + """ + Implement restriction: restrict residual on fine grid rf to coarse grid rc. + """ + with self.uf.dat.vec_wo as uf: + rf.copy(uf) + for bc in self.Vf_bcs: + bc.zero(self.uf) + + self._restrict() - def _op(self, action, x_bcs, y_bcs, x, y, X, Y, W=None, inc=False): - out = Y if W is None else W - if inc: - self._copy(Y, out) - with y.dat.vec_wo as v: - if W is None or inc: - v.zeroEntries() - else: - self._copy(Y, v) - with x.dat.vec_wo as v: - self._copy(X, v) - for bc in x_bcs: - bc.zero(x) - action() - for bc in y_bcs: - bc.zero(y) - with y.dat.vec_ro as v: - if inc: - out.axpy(1.0, v) - else: - self._copy(v, out) + for bc in self.Vc_bcs: + bc.zero(self.uc) + with self.uc.dat.vec_ro as uc: + uc.copy(rc) - def mult(self, mat, X, Y): - """Prolong correction on coarse grid X to fine grid Y.""" - self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y) + def mult(self, mat, xc, xf, inc=False): + """ + Implement prolongation: prolong correction on coarse grid xc to fine grid xf. + """ + with self.uc.dat.vec_wo as uc: + xc.copy(uc) + for bc in self.Vc_bcs: + bc.zero(self.uc) - def multAdd(self, mat, X, Y, W): - self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y, W=W, inc=self._prolong_write) + self._prolong() - def multTranspose(self, mat, X, Y): - """Restrict residual on fine grid X to coarse grid Y.""" - self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y) + for bc in self.Vf_bcs: + bc.zero(self.uf) + if inc: + with self.uf.dat.vec_ro as uf: + xf.axpy(1.0, uf) + else: + with self.uf.dat.vec_ro as uf: + uf.copy(xf) - def multTransposeAdd(self, mat, X, Y, W): - self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y, W=W) + def multAdd(self, mat, x, y, w): + if y.handle == w.handle: + self.mult(mat, x, w, inc=True) + else: + self.mult(mat, x, w) + w.axpy(1.0, y) class MixedInterpolationMatrix(StandaloneInterpolationMatrix): @@ -1542,12 +1512,7 @@ def _standalones(self): return standalones @cached_property - def _parloops(self): - # FIXME we cannot be lazy, as we do not know a priori if prolongation has write access - # Therefore we must initiliaze all standalone parloops in order to know the prolongation access - for s in self._standalones: - s._parloops - self._prolong_write = any(s._prolong_write for s in self._standalones) + def _kernels(self): prolong = lambda: [s._prolong() for s in self._standalones] restrict = lambda: [s._restrict() for s in self._standalones] return prolong, restrict From 766317e304ac49e59ab22a628154224e2f3e3124 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 4 Mar 2024 12:57:03 +0000 Subject: [PATCH 21/52] cleanup --- firedrake/functionspaceimpl.py | 8 +-- firedrake/preconditioners/facet_split.py | 7 +-- firedrake/preconditioners/fdm.py | 63 +++++++++++++----------- 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index 099a3864ee..7bf7bb2a3f 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -793,8 +793,8 @@ def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): if not unblocked and cdim > 1: non_ghost_cell_nodes *= cdim non_ghost_cell_nodes = numpy.add.outer(non_ghost_cell_nodes, - numpy.arange(cdim, dtype=non_ghost_cell_nodes.dtype)) - ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices), dtype=non_ghost_cell_nodes.dtype), + numpy.arange(cdim, dtype=cell_nodes.dtype)) + ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices), dtype=cell_nodes.dtype), non_ghost_cell_nodes, assume_unique=True) nodes.append(ghost_cell_nodes) @@ -1182,7 +1182,9 @@ class FunctionSpaceCargo: def extrude_cell_node_list(cell_node_list, offset, layers): - if layers.size == 1: + if offset is None: + cell_nodes = cell_node_list + elif layers.size == 1: nlayers = layers - 1 offsets = numpy.outer(numpy.arange(nlayers, dtype=offset.dtype), offset) cell_nodes = cell_node_list[:, None, :] + offsets[None, :, :] diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index 8bcff5d3f2..b35c5b8379 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -258,8 +258,5 @@ def get_permutation_map(V, W): for (PetscInt i=0; i<{size}; i++) v[i] = w[i]; }}""" kernel = op2.Kernel(kernel_code, "permutation", requires_zeroed_output_arguments=False) - op2.par_loop(kernel, V.mesh().cell_set, - vdat(op2.WRITE, pmap), wdat(op2.READ, W.cell_node_map())) - - own = V.dof_dset.layout_vec.sizes[0] - return perm[:own] + op2.par_loop(kernel, V.mesh().cell_set, vdat(op2.WRITE, pmap), wdat(op2.READ, W.cell_node_map())) + return vdat.data_ro diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index d532b48ea3..a51cf68442 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -13,9 +13,9 @@ from firedrake.functionspace import FunctionSpace, MixedFunctionSpace from firedrake.function import Function from firedrake.cofunction import Cofunction +from firedrake.parloops import par_loop from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions from firedrake.utils import cached_property -from firedrake.functionspaceimpl import extrude_cell_node_list from firedrake_citations import Citations from ufl.algorithms.ad import expand_derivatives from ufl.algorithms.expand_indices import expand_indices @@ -229,40 +229,43 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) # Create data structures needed for assembly + def get_broken_indices(V, indices): + W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) + w = Function(W, dtype=indices.dtype) + v = Function(V, val=indices) + domain = "{[i]: 0 <= i < v.dofs}" + instructions = """ + for i + w[i] = v[i] + end + """ + par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)}) + return w + def get_lgmap(V, broken=False): lgmap = V.dof_dset.lgmap if broken: - mesh = V.mesh() - ncell = mesh.cell_set.size - indices = V.cell_node_list[:ncell].copy() - if V.extruded: - indices = extrude_cell_node_list(indices, V.offset, mesh.layers) - lgmap.apply(indices, result=indices) + indices = get_broken_indices(V, lgmap.indices).dat.data_ro return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) else: return V.local_to_global_map([], lgmap=lgmap, non_ghost_cells=True) def mask_local_indices(V, lgmap, broken=False): + mask = lgmap.indices if broken: - indices = V.cell_node_list.copy() - if V.extruded: - indices = extrude_cell_node_list(indices, V.offset, V.mesh().layers) - mask = indices.flatten() - lgmap.apply(mask, result=mask) - V = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) - else: - mask = lgmap.indices - + w = get_broken_indices(V, mask) + V = w.function_space() + mask = w.dat.data_ro_with_halos indices = numpy.arange(len(mask), dtype=PETSc.IntType) indices[mask == -1] = -1 - indices_dat = op2.Dat(V.dof_dset, indices, dtype=PETSc.IntType) + indices_dat = V.make_dat(val=indices) indices_acc = indices_dat(op2.READ, V.cell_node_map()) return indices_acc broken = pmat_type == "is" self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, broken) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.indices = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} + self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} self.kernels = [] @@ -352,7 +355,6 @@ def mask_local_indices(V, lgmap, broken=False): Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) - # assembly_callables.append(P.view) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -361,25 +363,26 @@ def mask_local_indices(V, lgmap, broken=False): Pmats[Vrow, Vcol] = P def sub_nullspace(nsp, iset): - if not nsp.handle: + if not nsp.handle or iset is None: return nsp return PETSc.NullSpace().create(constant=nsp.hasConstant(), vectors=[vec.getSubVector(iset) for vec in nsp.getVecs()], comm=nsp.getComm()) + def set_nullspaces(P, A, iset=None): + set_nsps = (P.setNullSpace, P.setTransposeNullSpace, P.setNearNullSpace) + get_nsps = (A.getNullSpace, A.getTransposeNullSpace, A.getNearNullSpace) + for setNP, getNA in zip(set_nsps, get_nsps): + setNP(sub_nullspace(getNA(), iset)) + if len(V) == 1: Pmat = Pmats[V, V] - Pmat.setNullSpace(Amat.getNullSpace()) - Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace()) - Pmat.setNearNullSpace(Amat.getNearNullSpace()) - + set_nullspaces(Pmat, Amat) else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) - assembly_callables.append(Pmat.assemble) for Vrow, iset in zip(V, Pmat.getNestISs()[0]): - Pmats[Vrow, Vrow].setNullSpace(sub_nullspace(Amat.getNullSpace(), iset)) - Pmats[Vrow, Vrow].setTransposeNullSpace(sub_nullspace(Amat.getTransposeNullSpace(), iset)) - Pmats[Vrow, Vrow].setNearNullSpace(sub_nullspace(Amat.getNearNullSpace(), iset)) + set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset) + assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables @@ -765,8 +768,8 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): TripleProductKernel(R0, M, C1), TripleProductKernel(R0, M, C0)) self.kernels.append(element_kernel) - spaces = (Vrow, Vcol)[on_diag:] - indices_acc = tuple(self.indices[V] for V in spaces) + spaces = (Vrow,) if on_diag else (Vrow, Vcol) + indices_acc = tuple(self.indices_acc[V] for V in spaces) coefficients = self.coefficients["cell"] coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map()) kernel = element_kernel.kernel(on_diag=on_diag, addv=addv) From 8aae9ce4053c1799cff1c3d7402d7f844f746a39 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 4 Mar 2024 14:13:39 +0000 Subject: [PATCH 22/52] FDMPC: avoid MatAXPY --- firedrake/preconditioners/fdm.py | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index a51cf68442..bf91302ee1 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -898,6 +898,7 @@ class SchurComplementKernel(ElementKernel): PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); %(condense)s + PetscCall(MatSetValuesLocalSparse(A, A11, %(rows)s, %(cols)s, %(addv)d)); PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); PetscFunctionReturn(PETSC_SUCCESS); }""") @@ -918,7 +919,9 @@ def __init__(self, *kernels, name=None): istart += k * kdofs self.blocks = sorted(degree for degree in self.slices if degree > 1) - super().__init__(self.condense(), name=name) + result = self.condense() + result.axpy(1.0, self.submats[0]) + super().__init__(result, name=name) self.mats.extend(self.submats) self.rules["condense"] = self.condense_code @@ -930,16 +933,15 @@ class SchurComplementPattern(SchurComplementKernel): """Kernel builder to pad with zeros the Schur complement sparsity pattern.""" condense_code = dedent(""" PetscCall(MatProductNumeric(A11)); - PetscCall(MatAYPX(B, 0.0, A11, SUBSET_NONZERO_PATTERN)); + PetscCall(MatScale(B, 0.0)); """) def condense(self, result=None): """Pad with zeros the statically condensed pattern""" - structure = PETSc.Mat.Structure.SUBSET if result else None if result is None: _, A10, A01, A00 = self.submats result = A10.matMatMult(A00, A01, result=result) - result.aypx(0.0, self.submats[0], structure=structure) + result.scale(0.0) return result @@ -964,18 +966,15 @@ class SchurComplementDiagonal(SchurComplementKernel): PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); """) def condense(self, result=None): - structure = PETSc.Mat.Structure.SUBSET if result else None A11, A10, A01, A00 = self.submats self.work[0] = A00.getDiagonal(result=self.work[0]) self.work[0].reciprocal() self.work[0].scale(-1) A01.diagonalScale(L=self.work[0]) result = A10.matMult(A01, result=result) - result.axpy(1.0, A11, structure=structure) return result @@ -1016,11 +1015,10 @@ class SchurComplementBlockCholesky(SchurComplementKernel): PetscCall(MatProductGetMats(B, &X, NULL, NULL)); PetscCall(MatProductNumeric(X)); PetscCall(MatProductNumeric(B)); - PetscCall(MatAYPX(B, -1.0, A11, SUBSET_NONZERO_PATTERN)); + PetscCall(MatScale(B, -1.0)); """) def condense(self, result=None): - structure = PETSc.Mat.Structure.SUBSET if result else None # asssume that A10 = A01^T A11, _, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() @@ -1041,7 +1039,7 @@ def condense(self, result=None): A00.assemble() self.work[0] = A00.matMult(A01, result=self.work[0]) result = self.work[0].transposeMatMult(self.work[0], result=result) - result.aypx(-1.0, A11, structure=structure) + result.scale(-1.0) return result @@ -1113,13 +1111,11 @@ class SchurComplementBlockLU(SchurComplementKernel): PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(PetscFree3(ipiv, perm, work)); - // B = A11 - A10 * inv(L^T) * X + // B = - A10 * inv(L^T) * X PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); """) def condense(self, result=None): - structure = PETSc.Mat.Structure.SUBSET if result else None A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() Q = numpy.ones(R.shape, dtype=R.dtype) @@ -1144,7 +1140,6 @@ def condense(self, result=None): A00.assemble() A00.scale(-1.0) result = A10.matMatMult(A00, self.work[0], result=result) - result.axpy(1.0, A11, structure=structure) return result @@ -1192,11 +1187,9 @@ class SchurComplementBlockInverse(SchurComplementKernel): PetscCall(MatScale(A00, -1.0)); PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); """) def condense(self, result=None): - structure = PETSc.Mat.Structure.SUBSET if result else None A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() @@ -1215,7 +1208,6 @@ def condense(self, result=None): A00.assemble() A00.scale(-1.0) result = A10.matMatMult(A00, A01, result=result) - result.axpy(1.0, A11, structure=structure) return result From 0b1acb1cc159fbf86cf3f60e602a0521dc4339e2 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 4 Mar 2024 17:53:19 +0000 Subject: [PATCH 23/52] FDMPC: setISAllowRepeated --- firedrake/preconditioners/fdm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index bf91302ee1..6d2c753601 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -313,6 +313,7 @@ def mask_local_indices(V, lgmap, broken=False): preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) + preallocator.setISAllowRepeated(broken) preallocator.setLGMap(rlgmap, clgmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) preallocator.setUp() @@ -326,6 +327,7 @@ def mask_local_indices(V, lgmap, broken=False): P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) + P.setISAllowRepeated(broken) P.setLGMap(rlgmap, clgmap) P.setPreallocationNNZ((dnz, onz)) @@ -355,6 +357,7 @@ def mask_local_indices(V, lgmap, broken=False): Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) + # assembly_callables.append(P.view) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): From 2ce569a4a710f4ff2e3f6fd25c34ad376d438cc5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 4 Mar 2024 20:12:24 +0000 Subject: [PATCH 24/52] code review suggestions --- firedrake/preconditioners/fdm.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 6d2c753601..0f26f51baa 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -804,7 +804,7 @@ class ElementKernel: code = dedent(""" PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") def __init__(self, A, name=None): @@ -833,7 +833,7 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals))); PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals)); PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") if mat_type != "matfree": select_cols = """ @@ -861,7 +861,7 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); PetscCall(PetscFree(indices)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""}) code += self.code % dict(self.rules, name=self.name, indices=", ".join("const PetscInt *restrict %s" % s for s in indices), @@ -881,7 +881,7 @@ class TripleProductKernel(ElementKernel): PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") def __init__(self, L, C, R, name=None): @@ -903,7 +903,7 @@ class SchurComplementKernel(ElementKernel): %(condense)s PetscCall(MatSetValuesLocalSparse(A, A11, %(rows)s, %(cols)s, %(addv)d)); PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") def __init__(self, *kernels, name=None): @@ -936,7 +936,7 @@ class SchurComplementPattern(SchurComplementKernel): """Kernel builder to pad with zeros the Schur complement sparsity pattern.""" condense_code = dedent(""" PetscCall(MatProductNumeric(A11)); - PetscCall(MatScale(B, 0.0)); + PetscCall(MatZeroEntries(B)); """) def condense(self, result=None): @@ -944,7 +944,7 @@ def condense(self, result=None): if result is None: _, A10, A01, A00 = self.submats result = A10.matMatMult(A00, A01, result=result) - result.scale(0.0) + result.zeroEntries() return result @@ -1281,7 +1281,7 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False): %(matmult_call)s PetscCall(VecRestoreArrayRead(X, &x)); PetscCall(VecRestoreArray(Y, &y)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")}) return matmult_struct, matmult_call, ctx_struct, ctx_pack @@ -1312,7 +1312,7 @@ class InteriorSolveKernel(ElementKernel): PetscCall(KSPSolve(ksp, Y, X)); PetscCall(VecDestroy(&X)); PetscCall(VecDestroy(&Y)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_type="icc"): @@ -1392,7 +1392,7 @@ class ImplicitSchurComplementKernel(ElementKernel): for (i = 0; i < %(size)d; i++) y[i] = 0.0; %(A_call)s for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]]; - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }""") def __init__(self, kernel, name=None): @@ -1814,7 +1814,7 @@ def load_setSubMatCSR(comm, triu=False): PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals)); }} PetscCall(PetscFree(indices)); - PetscFunctionReturn(PETSC_SUCCESS); + return PETSC_SUCCESS; }} """) argtypes = [ctypes.c_voidp, ctypes.c_voidp, From 8f4dd6db67e2f4c593e7fd3549886ade1f0ec200 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 10:12:57 +0000 Subject: [PATCH 25/52] cleanup --- firedrake/functionspaceimpl.py | 34 ++------------------------------ firedrake/preconditioners/fdm.py | 23 +++++++++++---------- 2 files changed, 15 insertions(+), 42 deletions(-) diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index 7bf7bb2a3f..3d4f8db2c5 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -740,7 +740,7 @@ def boundary_nodes(self, sub_domain): return self._shared_data.boundary_nodes(self, sub_domain) @PETSc.Log.EventDecorator() - def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): + def local_to_global_map(self, bcs, lgmap=None): r"""Return a map from process local dof numbering to global dof numbering. If BCs is provided, mask out those dofs which match the BC nodes.""" @@ -748,7 +748,7 @@ def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): # not just on the bcs, but also the parent space, and anything # this space has been recursively split out from [e.g. inside # fieldsplit] - if not non_ghost_cells and (bcs is None or len(bcs) == 0): + if bcs is None or len(bcs) == 0: return lgmap or self.dof_dset.lgmap for bc in bcs: fs = bc.function_space() @@ -783,21 +783,6 @@ def local_to_global_map(self, bcs, lgmap=None, non_ghost_cells=False): nodes.append(tmp + i) else: nodes.append(bc.nodes) - if non_ghost_cells: - mesh = self.mesh() - cell_nodes = self.cell_node_list[:mesh.cell_set.size] - if mesh.cell_set._extruded: - cell_nodes = extrude_cell_node_list(cell_nodes, self.offset, mesh.layers) - non_ghost_cell_nodes = numpy.unique(cell_nodes.flatten()) - cdim = self.dof_dset.cdim - if not unblocked and cdim > 1: - non_ghost_cell_nodes *= cdim - non_ghost_cell_nodes = numpy.add.outer(non_ghost_cell_nodes, - numpy.arange(cdim, dtype=cell_nodes.dtype)) - ghost_cell_nodes = numpy.setdiff1d(numpy.arange(len(indices), dtype=cell_nodes.dtype), - non_ghost_cell_nodes, - assume_unique=True) - nodes.append(ghost_cell_nodes) nodes = numpy.unique(numpy.concatenate(nodes)) indices[nodes] = -1 return PETSc.LGMap().create(indices, bsize=bsize, comm=lgmap.comm) @@ -1179,18 +1164,3 @@ class FunctionSpaceCargo: topological: FunctionSpace parent: Optional[WithGeometryBase] - - -def extrude_cell_node_list(cell_node_list, offset, layers): - if offset is None: - cell_nodes = cell_node_list - elif layers.size == 1: - nlayers = layers - 1 - offsets = numpy.outer(numpy.arange(nlayers, dtype=offset.dtype), offset) - cell_nodes = cell_node_list[:, None, :] + offsets[None, :, :] - else: - nlayers = layers[:, 1] - layers[:, 0] - 1 - offsets = numpy.outer(numpy.arange(max(nlayers), dtype=offset.dtype), offset) - cell_nodes = numpy.concatenate([base_nodes[None, :] + offsets[:n, :] - for n, base_nodes in zip(nlayers, cell_node_list)]) - return cell_nodes diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 0f26f51baa..57d686f8c5 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -125,14 +125,11 @@ def initialize(self, pc): # Transform the problem into the space with FDM shape functions V = J.arguments()[-1].function_space() - element = V.ufl_element() - e_fdm = element.reconstruct(variant=self._variant) - - if element == e_fdm: - V_fdm, J_fdm, bcs_fdm = (V, J, bcs) + V_fdm = V.reconstruct(variant=self._variant) + if V == V_fdm: + J_fdm, bcs_fdm = (J, bcs) else: # Reconstruct Jacobian and bcs with variant element - V_fdm = FunctionSpace(V.mesh(), e_fdm) J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments())) bcs_fdm = [] for bc in bcs: @@ -242,13 +239,19 @@ def get_broken_indices(V, indices): par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)}) return w - def get_lgmap(V, broken=False): + def get_lgmap(V, mat_type, broken=False): lgmap = V.dof_dset.lgmap + if pmat_type != "is": + return lgmap if broken: indices = get_broken_indices(V, lgmap.indices).dat.data_ro - return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) else: - return V.local_to_global_map([], lgmap=lgmap, non_ghost_cells=True) + indices = lgmap.indices.copy() + local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) + cell_node_map = get_broken_indices(V, local_indices) + ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map.dat.data_ro), assume_unique=True) + indices[ghost] = -1 + return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) def mask_local_indices(V, lgmap, broken=False): mask = lgmap.indices @@ -263,7 +266,7 @@ def mask_local_indices(V, lgmap, broken=False): return indices_acc broken = pmat_type == "is" - self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, broken) for Vsub in V} + self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, pmat_type, broken) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) From 470fc591fd9f19a5b674166b36b19bc5ac0680f8 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 10:35:47 +0000 Subject: [PATCH 26/52] Wrap PCBDDC --- firedrake/preconditioners/__init__.py | 1 + firedrake/preconditioners/bddc.py | 49 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 firedrake/preconditioners/bddc.py diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py index cd75ae7380..491a73657b 100644 --- a/firedrake/preconditioners/__init__.py +++ b/firedrake/preconditioners/__init__.py @@ -12,3 +12,4 @@ from firedrake.preconditioners.fdm import * # noqa: F401 from firedrake.preconditioners.hiptmair import * # noqa: F401 from firedrake.preconditioners.facet_split import * # noqa: F401 +from firedrake.preconditioners.bddc import * # noqa: F401 diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py new file mode 100644 index 0000000000..441ab3cd0d --- /dev/null +++ b/firedrake/preconditioners/bddc.py @@ -0,0 +1,49 @@ +from firedrake.preconditioners.base import PCBase +from firedrake.preconditioners.patch import bcdofs +from firedrake.petsc import PETSc +from firedrake.dmhooks import get_appctx +import numpy + + +__all__ = ("BDDCPC",) + + +class BDDCPC(PCBase): + """PC for PETSc PCBDDC""" + + _prefix = "bddc" + + def initialize(self, pc): + # Get context from pc + _, P = pc.getOperators() + dm = pc.getDM() + self.prefix = pc.getOptionsPrefix() + self._prefix + + # Create new PC object as BDDC type + bddcpc = PETSc.PC().create(comm=pc.comm) + bddcpc.incrementTabLevel(1, parent=pc) + bddcpc.setOptionsPrefix(self.prefix) + bddcpc.setOperators(*pc.getOperators()) + bddcpc.setType(PETSc.PC.Type.BDDC) + + ctx = get_appctx(dm) + bcs = tuple(ctx._problem.bcs) + if len(bcs) > 0: + bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) + bndr = PETSc.IS().createGeneral(bc_nodes, comm=pc.comm) + bddcpc.setBDDCDirichletBoundariesLocal(bndr) + + bddcpc.setFromOptions() + self.pc = bddcpc + + def view(self, pc, viewer=None): + self.pc.view(viewer=viewer) + + def update(self, pc): + pass + + def apply(self, pc, x, y): + self.pc.apply(x, y) + + def applyTranspose(self, pc, x, y): + self.pc.applyTranspose(x, y) From 8cd47527103888ff1a9ca10266fe3368242ffbd5 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 11:51:17 +0000 Subject: [PATCH 27/52] BDDC: use global bc nodes --- firedrake/preconditioners/bddc.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 441ab3cd0d..a2ca36ccb1 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -1,7 +1,7 @@ from firedrake.preconditioners.base import PCBase from firedrake.preconditioners.patch import bcdofs from firedrake.petsc import PETSc -from firedrake.dmhooks import get_appctx +from firedrake.dmhooks import get_function_space, get_appctx import numpy @@ -19,6 +19,8 @@ def initialize(self, pc): dm = pc.getDM() self.prefix = pc.getOptionsPrefix() + self._prefix + V = get_function_space(dm) + # Create new PC object as BDDC type bddcpc = PETSc.PC().create(comm=pc.comm) bddcpc.incrementTabLevel(1, parent=pc) @@ -30,8 +32,9 @@ def initialize(self, pc): bcs = tuple(ctx._problem.bcs) if len(bcs) > 0: bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) + V.dof_dset.lgmap.apply(bc_nodes, result=bc_nodes) bndr = PETSc.IS().createGeneral(bc_nodes, comm=pc.comm) - bddcpc.setBDDCDirichletBoundariesLocal(bndr) + bddcpc.setBDDCDirichletBoundaries(bndr) bddcpc.setFromOptions() self.pc = bddcpc From b2342c5e7ef328780183a4576a0cae11fdf3d6ca Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 12:15:22 +0000 Subject: [PATCH 28/52] BDDCPC: attach discrete gradient and divergence mat --- firedrake/preconditioners/bddc.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index a2ca36ccb1..fece725721 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -1,10 +1,13 @@ from firedrake.preconditioners.base import PCBase +from firedrake.preconditioners.fdm import tabulate_exterior_derivative from firedrake.preconditioners.patch import bcdofs from firedrake.petsc import PETSc from firedrake.dmhooks import get_function_space, get_appctx +from firedrake.ufl_expr import TestFunction, TrialFunction +from ufl import inner, div, dx, HCurl, HDiv +from pyop2.utils import as_tuple import numpy - __all__ = ("BDDCPC",) @@ -36,6 +39,27 @@ def initialize(self, pc): bndr = PETSc.IS().createGeneral(bc_nodes, comm=pc.comm) bddcpc.setBDDCDirichletBoundaries(bndr) + appctx = self.get_appctx(pc) + sobolev_space = V.ufl_element().sobolev_space + if sobolev_space == HCurl: + if "discrete_gradient" in appctx: + gradient = appctx["discrete_gradient"] + else: + Q = V.reconstruct(family="Lagrange") + gradient = tabulate_exterior_derivative(Q, V) + bddcpc.setBDDCDiscreteGradient(gradient) + + elif sobolev_space == HDiv: + if "divergence_mat" in appctx: + B = appctx["divergence_mat"] + else: + from firedrake.assemble import assemble + degree = max(as_tuple(V.ufl_element().degree())) + Q = V.reconstruct(family="DG", degree=degree-1, variant=None) + b = inner(div(TrialFunction(V)), TestFunction(Q)) * dx + B = assemble(b, mat_type="matfree") + bddcpc.setBDDCDivergenceMat(B.petscmat) + bddcpc.setFromOptions() self.pc = bddcpc From 193e3b67fb1abea3e947e1de13fbfb62c4174633 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 13:45:12 +0000 Subject: [PATCH 29/52] add missing underscore to prefix --- firedrake/preconditioners/bddc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index fece725721..1393748864 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -14,7 +14,7 @@ class BDDCPC(PCBase): """PC for PETSc PCBDDC""" - _prefix = "bddc" + _prefix = "bddc_" def initialize(self, pc): # Get context from pc From b0c2a897343dc9d5d84e8f5666a428c7370c2459 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 17:00:48 +0000 Subject: [PATCH 30/52] normalize subVectors --- firedrake/preconditioners/bddc.py | 12 +++++------- firedrake/preconditioners/fdm.py | 5 ++++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 1393748864..2ab41743cd 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -1,5 +1,4 @@ from firedrake.preconditioners.base import PCBase -from firedrake.preconditioners.fdm import tabulate_exterior_derivative from firedrake.preconditioners.patch import bcdofs from firedrake.petsc import PETSc from firedrake.dmhooks import get_function_space, get_appctx @@ -42,17 +41,16 @@ def initialize(self, pc): appctx = self.get_appctx(pc) sobolev_space = V.ufl_element().sobolev_space if sobolev_space == HCurl: - if "discrete_gradient" in appctx: - gradient = appctx["discrete_gradient"] - else: + gradient = appctx.get("discrete_gradient", None) + if gradient is None: + from firedrake.preconditioners.fdm import tabulate_exterior_derivative Q = V.reconstruct(family="Lagrange") gradient = tabulate_exterior_derivative(Q, V) bddcpc.setBDDCDiscreteGradient(gradient) elif sobolev_space == HDiv: - if "divergence_mat" in appctx: - B = appctx["divergence_mat"] - else: + B = appctx.get("divergence_mat", None) + if B is None: from firedrake.assemble import assemble degree = max(as_tuple(V.ufl_element().degree())) Q = V.reconstruct(family="DG", degree=degree-1, variant=None) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 57d686f8c5..23555acbd5 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -371,8 +371,11 @@ def mask_local_indices(V, lgmap, broken=False): def sub_nullspace(nsp, iset): if not nsp.handle or iset is None: return nsp + vectors = [vec.getSubVector(iset) for vec in nsp.getVecs()] + for v in vectors: + v.normalize() return PETSc.NullSpace().create(constant=nsp.hasConstant(), - vectors=[vec.getSubVector(iset) for vec in nsp.getVecs()], + vectors=vectors, comm=nsp.getComm()) def set_nullspaces(P, A, iset=None): From c29c0f4aecb9b4fdd295e06af702e76cc8e66f58 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 5 Mar 2024 17:27:34 +0000 Subject: [PATCH 31/52] BDDCPC: Disable computation of disconected components of subdomain interfaces --- firedrake/preconditioners/bddc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 2ab41743cd..7393786190 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -30,6 +30,11 @@ def initialize(self, pc): bddcpc.setOperators(*pc.getOperators()) bddcpc.setType(PETSc.PC.Type.BDDC) + # Disable computation of disconected components of subdomain interfaces + opts = PETSc.Options(bddcpc.getOptionsPrefix()) + if "pc_bddc_use_local_mat_graph" not in opts: + opts["pc_bddc_use_local_mat_graph"] = False + ctx = get_appctx(dm) bcs = tuple(ctx._problem.bcs) if len(bcs) > 0: From c240e3492300c629de443699164ac334662e75a1 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 8 Mar 2024 10:58:19 +0000 Subject: [PATCH 32/52] refactor --- firedrake/preconditioners/fdm.py | 132 +++++++++++---------- firedrake/preconditioners/pmg.py | 189 ++++++++++++++++++------------- 2 files changed, 181 insertions(+), 140 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 23555acbd5..3167e8d429 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -266,12 +266,12 @@ def mask_local_indices(V, lgmap, broken=False): return indices_acc broken = pmat_type == "is" + self.V = V self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, pmat_type, broken) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} - self.kernels = [] Pmats = {} # Dictionary with kernel to compute the Schur complement @@ -308,40 +308,7 @@ def mask_local_indices(V, lgmap, broken=False): # Preallocate and assemble the FDM auxiliary sparse operator on_diag = Vrow == Vcol - sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) - ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ - rlgmap = self.non_ghosted_lgmaps[Vrow] - clgmap = self.non_ghosted_lgmaps[Vcol] - - preallocator = PETSc.Mat().create(comm=self.comm) - preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) - preallocator.setSizes(sizes) - preallocator.setISAllowRepeated(broken) - preallocator.setLGMap(rlgmap, clgmap) - preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) - preallocator.setUp() - self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype) - preallocator.assemble() - dnz, onz = get_preallocation(preallocator, sizes[0][0]) - if on_diag: - numpy.maximum(dnz, 1, out=dnz) - preallocator.destroy() - - P = PETSc.Mat().create(comm=self.comm) - P.setType(ptype) - P.setSizes(sizes) - P.setISAllowRepeated(broken) - P.setLGMap(rlgmap, clgmap) - P.setPreallocationNNZ((dnz, onz)) - - P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) - P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") - P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) - P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) - if ptype.endswith("sbaij"): - P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) - P.setUp() - + P = self.setup_block(Vrow, Vcol, pmat_type, broken, addv) self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries if on_diag: @@ -353,7 +320,7 @@ def mask_local_indices(V, lgmap, broken=False): # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) - assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype)) + assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=P.getType())) if on_diag: own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] @@ -391,8 +358,7 @@ def set_nullspaces(P, A, iset=None): Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) for Vrow, iset in zip(V, Pmat.getNestISs()[0]): set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset) - assembly_callables.append(Pmat.assemble) - + assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables @@ -568,9 +534,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=False): V = args_J[0].function_space() fe = V.finat_element formdegree = fe.formdegree - degree = fe.degree - if type(degree) != int: - degree, = set(degree) + degree, = set(as_tuple(fe.degree)) qdeg = degree if formdegree == tdim: qfam = "DG" if tdim == 1 else "DQ" @@ -637,9 +601,7 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): fe = V.finat_element tdim = fe.cell.get_spatial_dimension() formdegree = fe.formdegree - degree = fe.degree - if type(degree) != int: - degree, = set(degree) + degree, = set(as_tuple(fe.degree)) if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(fe) @@ -736,6 +698,67 @@ def _element_mass_matrix(self): data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=PETSc.COMM_SELF) + @cached_property + def _element_kernels(self): + M = self._element_mass_matrix + element_kernels = {} + for Vrow, Vcol in product(self.V, self.V): + # Interpolation of basis and exterior derivative onto broken spaces + C1 = self.assemble_reference_tensor(Vcol) + R1 = self.assemble_reference_tensor(Vrow, transpose=True) + # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b + element_kernel = TripleProductKernel(R1, M, C1) + schur_kernel = self.schur_kernel.get(Vrow) if Vrow == Vcol else None + if schur_kernel is not None: + V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) + C0 = self.assemble_reference_tensor(V0, sort_interior=True) + R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) + element_kernel = schur_kernel(element_kernel, + TripleProductKernel(R1, M, C0), + TripleProductKernel(R0, M, C1), + TripleProductKernel(R0, M, C0)) + element_kernels[Vrow, Vcol] = element_kernel + return element_kernels + + def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): + # Preallocate the auxiliary sparse operator + sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) + rmap = self.non_ghosted_lgmaps[Vrow] + cmap = self.non_ghosted_lgmaps[Vcol] + on_diag = Vrow == Vcol + ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ + + preallocator = PETSc.Mat().create(comm=self.comm) + preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) + preallocator.setSizes(sizes) + preallocator.setISAllowRepeated(broken) + preallocator.setLGMap(rmap, cmap) + preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) + preallocator.setUp() + self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype) + preallocator.assemble() + dnz, onz = get_preallocation(preallocator, sizes[0][0]) + preallocator.destroy() + + if on_diag: + numpy.maximum(dnz, 1, out=dnz) + P = PETSc.Mat().create(comm=self.comm) + P.setType(ptype) + P.setSizes(sizes) + P.setISAllowRepeated(broken) + P.setLGMap(rmap, cmap) + P.setPreallocationNNZ((dnz, onz)) + + P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") + P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) + P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) + P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True) + P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) + if ptype.endswith("sbaij"): + P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) + P.setUp() + return P + @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): """Assemble the auxiliary operator in the FDM basis using sparse @@ -760,27 +783,12 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): try: assembler = self.assemblers[key] except KeyError: - M = self._element_mass_matrix - # Interpolation of basis and exterior derivative onto broken spaces - C1 = self.assemble_reference_tensor(Vcol) - R1 = self.assemble_reference_tensor(Vrow, transpose=True) - - # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b - element_kernel = TripleProductKernel(R1, M, C1) - schur_kernel = self.schur_kernel.get(Vrow) if on_diag else None - if schur_kernel is not None: - V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) - C0 = self.assemble_reference_tensor(V0, sort_interior=True) - R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) - element_kernel = schur_kernel(element_kernel, - TripleProductKernel(R1, M, C0), - TripleProductKernel(R0, M, C1), - TripleProductKernel(R0, M, C0)) - self.kernels.append(element_kernel) spaces = (Vrow,) if on_diag else (Vrow, Vcol) indices_acc = tuple(self.indices_acc[V] for V in spaces) coefficients = self.coefficients["cell"] coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map()) + + element_kernel = self._element_kernels[Vrow, Vcol] kernel = element_kernel.kernel(on_diag=on_diag, addv=addv) assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, *element_kernel.make_args(A), @@ -2138,7 +2146,7 @@ def assemble_coefficients(self, J, fcp): tdim = mesh.topological_dimension() Finv = ufl.JacobianInverse(mesh) - degree = max(as_tuple(V.ufl_element().degree())) + degree, = set(as_tuple(V.ufl_element().degree())) quad_deg = fcp.get("degree", 2*degree+1) dx = ufl.dx(degree=quad_deg, domain=mesh) family = "Discontinuous Lagrange" if tdim == 1 else "DQ" diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 0d808eed6c..49cdb42aad 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -452,9 +452,11 @@ def configure_pmg(self, pc, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") - opts["mg_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(pc.getOptionsPrefix() + "pmg_") + if opts.getString("mg_coarse_pc_type") == "mg": + opts["mg_coarse_pc_mg_levels"] = min(opts.getInt("mg_coarse_pc_mg_levels", max_levels), max_levels) return ppc def apply(self, pc, x, y): @@ -495,10 +497,13 @@ def configure_pmg(self, snes, pdm): # the p-MG's coarse problem. So we need to set an option # for the user, if they haven't already; I don't know any # other way to get PETSc to know this at the right time. - opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") - opts["fas_coarse_pc_mg_levels"] = odm.getRefineLevel() + 1 - opts["fas_coarse_snes_fas_levels"] = odm.getRefineLevel() + 1 - + max_levels = odm.getRefineLevel() + 1 + if max_levels > 1: + opts = PETSc.Options(snes.getOptionsPrefix() + "pfas_") + if opts.getString("fas_coarse_pc_type") == "mg": + opts["fas_coarse_pc_mg_levels"] = min(opts.getInt("fas_coarse_pc_mg_levels", max_levels), max_levels) + if opts.getString("fas_coarse_snes_type") == "fas": + opts["fas_coarse_snes_fas_levels"] = min(opts.getInt("fas_coarse_snes_fas_levels", max_levels), max_levels) return psnes def step(self, snes, x, f, y): @@ -607,22 +612,18 @@ def evaluate_dual(source, target, derivative=None): A read-only :class:`numpy.ndarray` with the evaluation of the target dual basis on the (derivative of the) source primal basis. """ - primal = source.get_nodal_basis() + if derivative is None: + primal = source.get_nodal_basis() + else: + from FIAT.demkowicz import project_derivative + primal = project_derivative(source, derivative) + + coeffs = primal.get_coeffs() dual = target.get_dual_set() - A = dual.to_riesz(primal) - B = numpy.transpose(primal.get_coeffs()) - if derivative == "grad": - dmats = primal.get_dmats() - assert len(dmats) == 1 - alpha = (1,) - for i in range(len(alpha)): - for j in range(alpha[i]): - B = numpy.dot(dmats[i], B) - elif derivative in ("curl", "div"): - raise NotImplementedError(f"Dual evaluation of {derivative} is not yet implemented.") - elif derivative is not None: - raise ValueError(f"Invalid derivaitve type {derivative}.") - return get_readonly_view(numpy.dot(A, B)) + dual_mat = dual.to_riesz(primal) + A = dual_mat.reshape((dual_mat.shape[0], -1)) + B = coeffs.reshape((-1, A.shape[1])) + return get_readonly_view(numpy.dot(A, B.T)) @cached({}, key=generate_key_evaluate_dual) @@ -1168,16 +1169,16 @@ class StandaloneInterpolationMatrix(object): """ Interpolation matrix for a single standalone space. """ - _cache_work = {} - def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs): + def __init__(self, Vc, Vf, Vc_bcs, Vf_bcs, derivative=False): self.uc = self.work_function(Vc) self.uf = self.work_function(Vf) self.Vc = self.uc.function_space() self.Vf = self.uf.function_space() self.Vc_bcs = Vc_bcs self.Vf_bcs = Vf_bcs + self.derivative = derivative def work_function(self, V): if isinstance(V, firedrake.Function): @@ -1195,7 +1196,6 @@ def _weight(self): kernel_code = f""" void weight(PetscScalar *restrict w){{ for(PetscInt i=0; i<{size}; i++) w[i] += 1.0; - return; }} """ kernel = op2.Kernel(kernel_code, "weight", requires_zeroed_output_arguments=True) @@ -1205,7 +1205,8 @@ def _weight(self): return weight @cached_property - def _kernels(self): + def _parloops(self): + self._prolong_write = True try: # We generate custom prolongation and restriction kernels mainly because: # 1. Code generation for the transpose of prolongation is not readily available @@ -1217,6 +1218,7 @@ def _kernels(self): self.uf.dat(op2.INC, uf_map), self.uc.dat(op2.READ, uc_map), self._weight.dat(op2.READ, uf_map)] + self._prolong_write = False except ValueError: # The elements do not have the expected tensor product structure # Fall back to aij kernels @@ -1237,14 +1239,10 @@ def _kernels(self): return prolong, restrict def _prolong(self): - with self.uf.dat.vec_wo as uf: - uf.set(0.0E0) - self._kernels[0]() + self._parloops[0]() def _restrict(self): - with self.uc.dat.vec_wo as uc: - uc.set(0.0E0) - self._kernels[1]() + self._parloops[1]() def view(self, mat, viewer=None): if viewer is None: @@ -1272,8 +1270,7 @@ def getInfo(self, mat, info=None): else: raise ValueError("Unknown info type %s" % info) - @staticmethod - def make_blas_kernels(Vf, Vc): + def make_blas_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between CG / DG tensor product spaces on quads and hexes. @@ -1283,6 +1280,9 @@ def make_blas_kernels(Vf, Vc): and using the fact that the 2D / 3D tabulation is the tensor product J = kron(Jhat, kron(Jhat, Jhat)) """ + if self.derivative: + # TODO hook up tabulate_exterior_derivative from fdm.py + raise ValueError felem = Vf.ufl_element() celem = Vc.ufl_element() fmapping = felem.mapping().lower() @@ -1412,14 +1412,47 @@ def make_blas_kernels(Vf, Vc): ldargs=BLASLAPACK_LIB.split(), requires_zeroed_output_arguments=True) return prolong_kernel, restrict_kernel, coefficients + def mat_mult_kernel(self, A, value_size, transpose=False): + if transpose: + A = A.T + name = "matmult" + code = f""" + void {name}(PetscScalar *restrict y, const PetscScalar *restrict x + {", const PetscScalar *restrict w" if transpose else ""} + ){{ + const PetscScalar A[] = {{ {", ".join(map(float.hex, A.flat))} }}; + {IntType_c} i0, i1; + for ({IntType_c} k = 0; k < {value_size}; k++) {{ + for ({IntType_c} i = 0; i < {A.shape[0]}; i++) {{ + i0 = i * {value_size} + k; + {"" if transpose else "y[i0] = 0;"} + for ({IntType_c} j = 0; j < {A.shape[1]}; j++) {{ + i1 = j * {value_size} + k; + y[i0] += A[i * {A.shape[1]} + j] * {"(x[i1] * w[i1])" if transpose else "x[i1]"}; + }} + }} + }} + }}""" + return op2.Kernel(code, name, requires_zeroed_output_arguments=transpose) + def make_kernels(self, Vf, Vc): """ Interpolation and restriction kernels between arbitrary elements. This is temporary while we wait for dual evaluation in FInAT. """ - prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, self.uc) - matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, firedrake.TestFunction(Vc)) + if Vf.finat_element.formdegree == Vc.finat_element.formdegree + self.derivative and Vf.shape == Vc.shape: + derivative = None + if self.derivative: + derivative = {ufl.HCurl: "grad", ufl.HDiv: "curl", ufl.L2: "div"}[Vf.ufl_element().sobolev_space] + A = evaluate_dual(Vc.finat_element.fiat_equivalent, Vf.finat_element.fiat_equivalent, derivative=derivative) + return self.mat_mult_kernel(A, Vf.value_size), self.mat_mult_kernel(A, Vf.value_size, transpose=True), [] + + expr = self.uc + if self.derivative: + expr = ufl.exterior_derivative(expr) + prolong_kernel, _ = prolongation_transfer_kernel_action(Vf, expr) + matrix_kernel, coefficients = prolongation_transfer_kernel_action(Vf, ufl.derivative(expr, self.uc)) # The way we transpose the prolongation kernel is suboptimal. # A local matrix is generated each time the kernel is executed. @@ -1449,48 +1482,45 @@ def make_kernels(self, Vf, Vc): ) return prolong_kernel, restrict_kernel, coefficients - def multTranspose(self, mat, rf, rc): - """ - Implement restriction: restrict residual on fine grid rf to coarse grid rc. - """ - with self.uf.dat.vec_wo as uf: - rf.copy(uf) - for bc in self.Vf_bcs: - bc.zero(self.uf) - - self._restrict() + def _copy(self, x, y): + if x.handle != y.handle: + x.copy(y) - for bc in self.Vc_bcs: - bc.zero(self.uc) - with self.uc.dat.vec_ro as uc: - uc.copy(rc) + def _op(self, action, x_bcs, y_bcs, x, y, X, Y, W=None, inc=False): + out = Y if W is None else W + if inc: + self._copy(Y, out) + with y.dat.vec_wo as v: + if W is None or inc: + v.zeroEntries() + else: + self._copy(Y, v) + with x.dat.vec_wo as v: + self._copy(X, v) + for bc in x_bcs: + bc.zero(x) + action() + for bc in y_bcs: + bc.zero(y) + with y.dat.vec_ro as v: + if inc: + out.axpy(1.0, v) + else: + self._copy(v, out) - def mult(self, mat, xc, xf, inc=False): - """ - Implement prolongation: prolong correction on coarse grid xc to fine grid xf. - """ - with self.uc.dat.vec_wo as uc: - xc.copy(uc) - for bc in self.Vc_bcs: - bc.zero(self.uc) + def mult(self, mat, X, Y): + """Prolong correction on coarse grid X to fine grid Y.""" + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y) - self._prolong() + def multAdd(self, mat, X, Y, W): + self._op(self._prolong, self.Vc_bcs, self.Vf_bcs, self.uc, self.uf, X, Y, W=W, inc=self._prolong_write) - for bc in self.Vf_bcs: - bc.zero(self.uf) - if inc: - with self.uf.dat.vec_ro as uf: - xf.axpy(1.0, uf) - else: - with self.uf.dat.vec_ro as uf: - uf.copy(xf) + def multTranspose(self, mat, X, Y): + """Restrict residual on fine grid X to coarse grid Y.""" + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y) - def multAdd(self, mat, x, y, w): - if y.handle == w.handle: - self.mult(mat, x, w, inc=True) - else: - self.mult(mat, x, w) - w.axpy(1.0, y) + def multTransposeAdd(self, mat, X, Y, W): + self._op(self._restrict, self.Vf_bcs, self.Vc_bcs, self.uf, self.uc, X, Y, W=W) class MixedInterpolationMatrix(StandaloneInterpolationMatrix): @@ -1512,7 +1542,12 @@ def _standalones(self): return standalones @cached_property - def _kernels(self): + def _parloops(self): + # FIXME we cannot be lazy, as we do not know a priori if prolongation has write access + # Therefore we must initiliaze all standalone parloops in order to know the prolongation access + for s in self._standalones: + s._parloops + self._prolong_write = any(s._prolong_write for s in self._standalones) prolong = lambda: [s._prolong() for s in self._standalones] restrict = lambda: [s._restrict() for s in self._standalones] return prolong, restrict @@ -1535,10 +1570,8 @@ def prolongation_matrix_aij(P1, Pk, P1_bcs=[], Pk_bcs=[]): Pk = Pk.function_space() sp = op2.Sparsity((Pk.dof_dset, P1.dof_dset), - {(i, j): [(rmap, cmap, None)] - for i, rmap in enumerate(Pk.cell_node_map()) - for j, cmap in enumerate(P1.cell_node_map()) - if i == j}) + (Pk.cell_node_map(), + P1.cell_node_map())) mat = op2.Mat(sp, PETSc.ScalarType) mesh = Pk.mesh() From 24aa93189013a09cfe4aac9bbadff2fe2ff72384 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Fri, 8 Mar 2024 19:58:59 +0000 Subject: [PATCH 33/52] cannot preallocate upper triangular part of the matrix when using setValuesLocal, so just disable strict sbaij preallocation --- firedrake/preconditioners/bddc.py | 4 +-- firedrake/preconditioners/fdm.py | 50 ++++++++++++++++++------------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 7393786190..4995cbbfaf 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -30,9 +30,9 @@ def initialize(self, pc): bddcpc.setOperators(*pc.getOperators()) bddcpc.setType(PETSc.PC.Type.BDDC) - # Disable computation of disconected components of subdomain interfaces opts = PETSc.Options(bddcpc.getOptionsPrefix()) - if "pc_bddc_use_local_mat_graph" not in opts: + if V.ufl_element().variant() == "fdm": + # Disable computation of disconected components of subdomain interfaces opts["pc_bddc_use_local_mat_graph"] = False ctx = get_appctx(dm) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 3167e8d429..6f023f5a62 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -309,25 +309,31 @@ def mask_local_indices(V, lgmap, broken=False): # Preallocate and assemble the FDM auxiliary sparse operator on_diag = Vrow == Vcol P = self.setup_block(Vrow, Vcol, pmat_type, broken, addv) - self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") - # populate diagonal entries - if on_diag: - n = len(self.non_ghosted_lgmaps[Vrow].indices) - i = numpy.arange(n, dtype=PETSc.IntType).reshape(-1, 1) - v = numpy.ones(i.shape, dtype=PETSc.ScalarType) - P.setValuesLocalRCV(i, i, v, addv=addv) - P.assemble() + if pmat_type == "is": + self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") + # populate diagonal entries + if on_diag: + n = len(self.non_ghosted_lgmaps[Vrow].indices) + i = numpy.arange(n, dtype=PETSc.IntType)[:, None] + v = numpy.ones(i.shape, dtype=PETSc.ScalarType) + P.setValuesLocalRCV(i, i, v, addv=addv) + P.assemble() # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) - assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=P.getType())) + assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv)) if on_diag: own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] - Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) - assembly_callables.append(P.assemble) - assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) - # assembly_callables.append(P.view) + if pmat_type == "is": + Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) + assembly_callables.append(P.assemble) + assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) + # assembly_callables.append(P.view) + else: + v = numpy.ones(bdofs.shape, PETSc.ScalarType) + assembly_callables.append(partial(P.setValuesLocalRCV, bdofs, bdofs, v, addv=addv)) + assembly_callables.append(P.assemble) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): @@ -358,7 +364,7 @@ def set_nullspaces(P, A, iset=None): Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) for Vrow, iset in zip(V, Pmat.getNestISs()[0]): set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset) - assembly_callables.append(Pmat.assemble) + assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables @@ -734,14 +740,15 @@ def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): preallocator.setISAllowRepeated(broken) preallocator.setLGMap(rmap, cmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) + if ptype.endswith("sbaij"): + preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) preallocator.setUp() self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype) preallocator.assemble() dnz, onz = get_preallocation(preallocator, sizes[0][0]) - preallocator.destroy() - if on_diag: numpy.maximum(dnz, 1, out=dnz) + preallocator.destroy() P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) @@ -749,7 +756,8 @@ def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): P.setLGMap(rmap, cmap) P.setPreallocationNNZ((dnz, onz)) - P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, ptype != "is") + if not (ptype.endswith("sbaij") or ptype == "is"): + P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True) @@ -760,7 +768,7 @@ def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): return P @PETSc.Log.EventDecorator("FDMSetValues") - def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): + def set_values(self, A, Vrow, Vcol, addv, mat_type=None): """Assemble the auxiliary operator in the FDM basis using sparse reference tensors and diagonal mass matrices. @@ -783,6 +791,7 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): try: assembler = self.assemblers[key] except KeyError: + mat_type = mat_type or A.getType() spaces = (Vrow,) if on_diag else (Vrow, Vcol) indices_acc = tuple(self.indices_acc[V] for V in spaces) coefficients = self.coefficients["cell"] @@ -850,9 +859,8 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): return PETSC_SUCCESS; }""") if mat_type != "matfree": - select_cols = """ - for (PetscInt j = ai[i]; j < ai[i + 1]; j++) - indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);""" + select_cols = """for (PetscInt j = ai[i]; j < ai[i + 1]; j++) indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);""" + select_cols = "" code += dedent(""" static inline PetscErrorCode MatSetValuesLocalSparse(const Mat A, const Mat B, const PetscInt *restrict rindices, From 4a002a2c5c7d6bc9443a1e559f39ba4df925e6ec Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 11 Mar 2024 11:04:22 +0000 Subject: [PATCH 34/52] refactor --- firedrake/preconditioners/facet_split.py | 7 +- firedrake/preconditioners/fdm.py | 96 +++++++++++++----------- 2 files changed, 53 insertions(+), 50 deletions(-) diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index b35c5b8379..3c35d7423c 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -1,6 +1,7 @@ from functools import partial from mpi4py import MPI from pyop2 import op2, PermutedMap +from pyop2.utils import as_tuple from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase import firedrake.dmhooks as dmhooks @@ -208,11 +209,7 @@ def split_dofs(elem): edofs = [[], []] for key in sorted(entity_dofs.keys()): vals = entity_dofs[key] - edim = key - try: - edim = sum(edim) - except TypeError: - pass + edim = sum(as_tuple(key)) for k in sorted(vals.keys()): edofs[edim < ndim].extend(sorted(vals[k])) return tuple(numpy.array(e, dtype=PETSc.IntType) for e in edofs) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 6f023f5a62..28924ee5d4 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -61,6 +61,33 @@ __all__ = ("FDMPC", "PoissonFDMPC") +def get_broken_indices(V, indices): + W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) + w = Function(W, dtype=indices.dtype) + v = Function(V, val=indices) + domain = "{[i]: 0 <= i < v.dofs}" + instructions = """ + for i + w[i] = v[i] + end + """ + par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)}) + return w + + +def mask_local_indices(V, lgmap, broken=False): + mask = lgmap.indices + if broken: + w = get_broken_indices(V, mask) + V = w.function_space() + mask = w.dat.data_ro_with_halos + indices = numpy.arange(len(mask), dtype=PETSc.IntType) + indices[mask == -1] = -1 + indices_dat = V.make_dat(val=indices) + indices_acc = indices_dat(op2.READ, V.cell_node_map()) + return indices_acc + + class FDMPC(PCBase): """ A preconditioner for tensor-product elements that changes the shape @@ -172,6 +199,8 @@ def initialize(self, pc): self.pc = fdmpc # Assemble the FDM preconditioner with sparse local matrices + self.V = V_fdm + self.mat_type = pmat_type Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) self._assemble_P() @@ -226,48 +255,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) # Create data structures needed for assembly - def get_broken_indices(V, indices): - W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) - w = Function(W, dtype=indices.dtype) - v = Function(V, val=indices) - domain = "{[i]: 0 <= i < v.dofs}" - instructions = """ - for i - w[i] = v[i] - end - """ - par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)}) - return w - - def get_lgmap(V, mat_type, broken=False): - lgmap = V.dof_dset.lgmap - if pmat_type != "is": - return lgmap - if broken: - indices = get_broken_indices(V, lgmap.indices).dat.data_ro - else: - indices = lgmap.indices.copy() - local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) - cell_node_map = get_broken_indices(V, local_indices) - ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map.dat.data_ro), assume_unique=True) - indices[ghost] = -1 - return PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) - - def mask_local_indices(V, lgmap, broken=False): - mask = lgmap.indices - if broken: - w = get_broken_indices(V, mask) - V = w.function_space() - mask = w.dat.data_ro_with_halos - indices = numpy.arange(len(mask), dtype=PETSc.IntType) - indices[mask == -1] = -1 - indices_dat = V.make_dat(val=indices) - indices_acc = indices_dat(op2.READ, V.cell_node_map()) - return indices_acc - broken = pmat_type == "is" - self.V = V - self.non_ghosted_lgmaps = {Vsub: get_lgmap(Vsub, pmat_type, broken) for Vsub in V} self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) @@ -313,8 +301,7 @@ def mask_local_indices(V, lgmap, broken=False): self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries if on_diag: - n = len(self.non_ghosted_lgmaps[Vrow].indices) - i = numpy.arange(n, dtype=PETSc.IntType)[:, None] + i = numpy.arange(P.getLGMap()[0].getSize(), dtype=PETSc.IntType)[:, None] v = numpy.ones(i.shape, dtype=PETSc.ScalarType) P.setValuesLocalRCV(i, i, v, addv=addv) P.assemble() @@ -726,11 +713,30 @@ def _element_kernels(self): element_kernels[Vrow, Vcol] = element_kernel return element_kernels + @cached_property + def assembly_lgmaps(self): + if self.mat_type != "is": + return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V} + broken = True + lgmaps = {} + for Vsub in self.V: + lgmap = Vsub.dof_dset.lgmap + if broken: + indices = get_broken_indices(Vsub, lgmap.indices).dat.data_ro + else: + indices = lgmap.indices.copy() + local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) + cell_node_map = get_broken_indices(Vsub, local_indices) + ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map.dat.data_ro), assume_unique=True) + indices[ghost] = -1 + lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) + return lgmaps + def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): # Preallocate the auxiliary sparse operator sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) - rmap = self.non_ghosted_lgmaps[Vrow] - cmap = self.non_ghosted_lgmaps[Vcol] + rmap = self.assembly_lgmaps[Vrow] + cmap = self.assembly_lgmaps[Vcol] on_diag = Vrow == Vcol ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ From b6a6589433a61ed7727e6fbab71ad237770cbe2c Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 11 Mar 2024 17:32:33 +0000 Subject: [PATCH 35/52] refactor --- firedrake/preconditioners/fdm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 28924ee5d4..43c06b6c15 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -296,7 +296,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # Preallocate and assemble the FDM auxiliary sparse operator on_diag = Vrow == Vcol - P = self.setup_block(Vrow, Vcol, pmat_type, broken, addv) + P = self.setup_block(Vrow, Vcol, addv) if pmat_type == "is": self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") # populate diagonal entries @@ -732,18 +732,18 @@ def assembly_lgmaps(self): lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) return lgmaps - def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): + def setup_block(self, Vrow, Vcol, addv): # Preallocate the auxiliary sparse operator sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) rmap = self.assembly_lgmaps[Vrow] cmap = self.assembly_lgmaps[Vcol] on_diag = Vrow == Vcol - ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ + ptype = self.mat_type if on_diag else PETSc.Mat.Type.AIJ preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) - preallocator.setISAllowRepeated(broken) + preallocator.setISAllowRepeated(True) preallocator.setLGMap(rmap, cmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) if ptype.endswith("sbaij"): @@ -758,7 +758,7 @@ def setup_block(self, Vrow, Vcol, pmat_type, broken, addv): P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - P.setISAllowRepeated(broken) + P.setISAllowRepeated(True) P.setLGMap(rmap, cmap) P.setPreallocationNNZ((dnz, onz)) From 64cf55bd114c8cd6072dc0859516b7d7b2bedda7 Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 12 Mar 2024 18:13:37 +0300 Subject: [PATCH 36/52] Call copy on subvector otherwise normalize will fail --- firedrake/preconditioners/fdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 43c06b6c15..a72638aeda 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -331,7 +331,7 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati def sub_nullspace(nsp, iset): if not nsp.handle or iset is None: return nsp - vectors = [vec.getSubVector(iset) for vec in nsp.getVecs()] + vectors = [vec.getSubVector(iset).copy() for vec in nsp.getVecs()] for v in vectors: v.normalize() return PETSc.NullSpace().create(constant=nsp.hasConstant(), From 909aa746b00003b8496b5566ca078ab18913d683 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 12 Mar 2024 15:18:43 +0000 Subject: [PATCH 37/52] refactor --- firedrake/preconditioners/bddc.py | 2 +- firedrake/preconditioners/fdm.py | 89 ++++++++++++++++++------------- 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 4995cbbfaf..ae59aba322 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -31,7 +31,7 @@ def initialize(self, pc): bddcpc.setType(PETSc.PC.Type.BDDC) opts = PETSc.Options(bddcpc.getOptionsPrefix()) - if V.ufl_element().variant() == "fdm": + if V.ufl_element().variant() == "fdm" and "pc_bddc_use_local_mat_graph" not in opts: # Disable computation of disconected components of subdomain interfaces opts["pc_bddc_use_local_mat_graph"] = False diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 43c06b6c15..58ad2e880f 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -61,10 +61,14 @@ __all__ = ("FDMPC", "PoissonFDMPC") -def get_broken_indices(V, indices): +def get_insert_mode(Vrow, Vcol): + return PETSc.InsertMode.ADD_VALUES + + +def local_to_cell(V, val): W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) - w = Function(W, dtype=indices.dtype) - v = Function(V, val=indices) + w = Function(W, dtype=val.dtype) + v = Function(V, val=val) domain = "{[i]: 0 <= i < v.dofs}" instructions = """ for i @@ -75,10 +79,10 @@ def get_broken_indices(V, indices): return w -def mask_local_indices(V, lgmap, broken=False): +def mask_local_indices(V, lgmap, repeated=False): mask = lgmap.indices - if broken: - w = get_broken_indices(V, mask) + if repeated: + w = local_to_cell(V, mask) V = w.function_space() mask = w.dat.data_ro_with_halos indices = numpy.arange(len(mask), dtype=PETSc.IntType) @@ -201,6 +205,7 @@ def initialize(self, pc): # Assemble the FDM preconditioner with sparse local matrices self.V = V_fdm self.mat_type = pmat_type + self.allow_repeated = pmat_type == "is" Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) self._assemble_P() @@ -255,9 +260,8 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati self.fises = PETSc.IS().createBlock(Vbig.value_size, fdofs, comm=PETSc.COMM_SELF) # Create data structures needed for assembly - broken = pmat_type == "is" self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} - self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], broken) for Vsub in V} + self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], self.allow_repeated) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} Pmats = {} @@ -284,7 +288,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati Amat, Pmats = self.condense(Amat, J, bcs, fcp, pc_type=interior_pc_type) diagonal_terms = [] - addv = PETSc.InsertMode.ADD_VALUES # Loop over all pairs of subspaces for Vrow, Vcol in product(V, V): if (Vrow, Vcol) in Pmats: @@ -296,11 +299,14 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # Preallocate and assemble the FDM auxiliary sparse operator on_diag = Vrow == Vcol - P = self.setup_block(Vrow, Vcol, addv) - if pmat_type == "is": - self.set_values(P, Vrow, Vcol, addv, mat_type="preallocator") - # populate diagonal entries + P = self.setup_block(Vrow, Vcol) + addv = self.insert_mode[Vrow, Vcol] + + assemble_sparsity = P.getType() == "is" + if assemble_sparsity: + self.set_values(P, Vrow, Vcol, mat_type="preallocator") if on_diag: + # populate diagonal entries i = numpy.arange(P.getLGMap()[0].getSize(), dtype=PETSc.IntType)[:, None] v = numpy.ones(i.shape, dtype=PETSc.ScalarType) P.setValuesLocalRCV(i, i, v, addv=addv) @@ -308,11 +314,11 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) - assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv)) + assembly_callables.append(partial(self.set_values, P, Vrow, Vcol)) if on_diag: own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] - if pmat_type == "is": + if assemble_sparsity: Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) @@ -713,26 +719,40 @@ def _element_kernels(self): element_kernels[Vrow, Vcol] = element_kernel return element_kernels + @cached_property + def insert_mode(self): + is_dg = {} + for Vsub in self.V: + element = Vsub.finat_element + is_dg[Vsub] = element.entity_dofs() == element.entity_closure_dofs() + + insert_mode = {} + for Vrow, Vcol in product(self.V, self.V): + addv = PETSc.InsertMode.ADD_VALUES + if is_dg[Vrow] or is_dg[Vcol]: + addv = PETSc.InsertMode.INSERT + insert_mode[Vrow, Vcol] = addv + return insert_mode + @cached_property def assembly_lgmaps(self): if self.mat_type != "is": return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V} - broken = True lgmaps = {} for Vsub in self.V: lgmap = Vsub.dof_dset.lgmap - if broken: - indices = get_broken_indices(Vsub, lgmap.indices).dat.data_ro + if self.allow_repeated: + indices = local_to_cell(Vsub, lgmap.indices).dat.data_ro else: indices = lgmap.indices.copy() local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) - cell_node_map = get_broken_indices(Vsub, local_indices) + cell_node_map = local_to_cell(Vsub, local_indices) ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map.dat.data_ro), assume_unique=True) indices[ghost] = -1 lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) return lgmaps - def setup_block(self, Vrow, Vcol, addv): + def setup_block(self, Vrow, Vcol): # Preallocate the auxiliary sparse operator sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) rmap = self.assembly_lgmaps[Vrow] @@ -743,13 +763,13 @@ def setup_block(self, Vrow, Vcol, addv): preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) - preallocator.setISAllowRepeated(True) + preallocator.setISAllowRepeated(self.allow_repeated) preallocator.setLGMap(rmap, cmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) if ptype.endswith("sbaij"): preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) preallocator.setUp() - self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype) + self.set_values(preallocator, Vrow, Vcol) preallocator.assemble() dnz, onz = get_preallocation(preallocator, sizes[0][0]) if on_diag: @@ -758,7 +778,7 @@ def setup_block(self, Vrow, Vcol, addv): P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) - P.setISAllowRepeated(True) + P.setISAllowRepeated(self.allow_repeated) P.setLGMap(rmap, cmap) P.setPreallocationNNZ((dnz, onz)) @@ -774,7 +794,7 @@ def setup_block(self, Vrow, Vcol, addv): return P @PETSc.Log.EventDecorator("FDMSetValues") - def set_values(self, A, Vrow, Vcol, addv, mat_type=None): + def set_values(self, A, Vrow, Vcol, mat_type=None): """Assemble the auxiliary operator in the FDM basis using sparse reference tensors and diagonal mass matrices. @@ -786,18 +806,15 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type=None): The test space. Vcol : FunctionSpace The trial space. - addv : PETSc.Mat.InsertMode - Flag indicating if we want to insert or add matrix values. - mat_type : PETSc.Mat.Type - The matrix type of auxiliary operator. This only used when ``A`` is a preallocator - to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix. """ key = (Vrow.ufl_element(), Vcol.ufl_element()) on_diag = Vrow == Vcol + if mat_type is None: + mat_type = A.getType() try: assembler = self.assemblers[key] except KeyError: - mat_type = mat_type or A.getType() + addv = self.insert_mode[Vrow, Vcol] spaces = (Vrow,) if on_diag else (Vrow, Vcol) indices_acc = tuple(self.indices_acc[V] for V in spaces) coefficients = self.coefficients["cell"] @@ -810,7 +827,7 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type=None): coefficients_acc, *indices_acc) self.assemblers.setdefault(key, assembler) - if A.getType() == "preallocator" or mat_type == "preallocator": + if mat_type == "preallocator": key = key + ("preallocator",) try: assembler = self.assemblers[key] @@ -883,14 +900,13 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): for (PetscInt i = 0; i < m; i++) { istart = ai[i]; ncols = ai[i + 1] - istart; - %(select_cols)s PetscCall(MatSetValuesLocal(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); } PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); PetscCall(PetscFree(indices)); return PETSC_SUCCESS; - }""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""}) + }""") code += self.code % dict(self.rules, name=self.name, indices=", ".join("const PetscInt *restrict %s" % s for s in indices), rows=indices[0], cols=indices[-1], addv=addv) @@ -1912,7 +1928,7 @@ def assemble_reference_tensor(self, V): return Afdm, Dfdm, bdof, axes_shifts @PETSc.Log.EventDecorator("FDMSetValues") - def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): + def set_values(self, A, Vrow, Vcol, mat_type=None): """Assemble the stiffness matrix in the FDM basis using Kronecker products of interval matrices. @@ -1924,8 +1940,6 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): The test space. Vcol : FunctionSpace The trial space. - addv : PETSc.Mat.InsertMode - Flag indicating if we want to insert or add matrix values. mat_type : PETSc.Mat.Type The matrix type of auxiliary operator. This only used when ``A`` is a preallocator to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix. @@ -1934,6 +1948,9 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): set_submat = SparseAssembler.setSubMatCSR(PETSc.COMM_SELF, triu=triu) update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv) condense_element_mat = lambda x: x + addv = PETSc.InsertMode.ADD_VALUES + if mat_type is None: + mat_type = A.getType() def cell_to_global(lgmap, cell_to_local, cell_index, result=None): # Be careful not to create new arrays From 36fb5fa837a438456ee1d9f6c7fd786866b8fe1a Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 12 Mar 2024 17:52:19 +0000 Subject: [PATCH 38/52] setBDDCNeumannBoundaries --- firedrake/preconditioners/bddc.py | 22 +++++++++++++++++----- firedrake/preconditioners/fdm.py | 10 +++++----- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index ae59aba322..c690ca42c1 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -37,11 +37,23 @@ def initialize(self, pc): ctx = get_appctx(dm) bcs = tuple(ctx._problem.bcs) - if len(bcs) > 0: - bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) - V.dof_dset.lgmap.apply(bc_nodes, result=bc_nodes) - bndr = PETSc.IS().createGeneral(bc_nodes, comm=pc.comm) - bddcpc.setBDDCDirichletBoundaries(bndr) + if V.extruded: + boundary_nodes = numpy.unique(numpy.concatenate(list(map(V.boundary_nodes, ("on_boundary", "top", "bottom"))))) + else: + boundary_nodes = V.boundary_nodes("on_boundary") + if len(bcs) == 0: + dir_nodes = numpy.empty(0, dtype=boundary_nodes.dtype) + else: + dir_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) + neu_nodes = numpy.setdiff1d(boundary_nodes, dir_nodes) + + V.dof_dset.lgmap.apply(dir_nodes, result=dir_nodes) + dir_bndr = PETSc.IS().createGeneral(dir_nodes, comm=pc.comm) + bddcpc.setBDDCDirichletBoundaries(dir_bndr) + + V.dof_dset.lgmap.apply(neu_nodes, result=neu_nodes) + neu_bndr = PETSc.IS().createGeneral(neu_nodes, comm=pc.comm) + bddcpc.setBDDCNeumannBoundaries(neu_bndr) appctx = self.get_appctx(pc) sobolev_space = V.ufl_element().sobolev_space diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index e8c1d9deac..f13e437a5b 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -61,7 +61,7 @@ __all__ = ("FDMPC", "PoissonFDMPC") -def local_to_cell(V, val): +def broken_function(V, val): W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element())) w = Function(W, dtype=val.dtype) v = Function(V, val=val) @@ -78,7 +78,7 @@ def local_to_cell(V, val): def mask_local_indices(V, lgmap, repeated=False): mask = lgmap.indices if repeated: - w = local_to_cell(V, mask) + w = broken_function(V, mask) V = w.function_space() mask = w.dat.data_ro_with_halos indices = numpy.arange(len(mask), dtype=PETSc.IntType) @@ -738,12 +738,12 @@ def assembly_lgmaps(self): for Vsub in self.V: lgmap = Vsub.dof_dset.lgmap if self.allow_repeated: - indices = local_to_cell(Vsub, lgmap.indices).dat.data_ro + indices = broken_function(Vsub, lgmap.indices).dat.data_ro else: indices = lgmap.indices.copy() local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) - cell_node_map = local_to_cell(Vsub, local_indices) - ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map.dat.data_ro), assume_unique=True) + cell_node_map = broken_function(Vsub, local_indices).dat.data_ro + ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True) indices[ghost] = -1 lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) return lgmaps From 67ed33bd6c9f8646aa76fc5a360988b151628888 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 20 Mar 2024 16:13:04 -0500 Subject: [PATCH 39/52] BDDC: fix auxiliary space construction --- firedrake/preconditioners/bddc.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index c690ca42c1..ec3df710e9 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -3,6 +3,7 @@ from firedrake.petsc import PETSc from firedrake.dmhooks import get_function_space, get_appctx from firedrake.ufl_expr import TestFunction, TrialFunction +from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace from ufl import inner, div, dx, HCurl, HDiv from pyop2.utils import as_tuple import numpy @@ -57,11 +58,19 @@ def initialize(self, pc): appctx = self.get_appctx(pc) sobolev_space = V.ufl_element().sobolev_space + if V.shape == (): + make_function_space = FunctionSpace + elif len(V.shape) == 1: + make_function_space = VectorFunctionSpace + else: + make_function_space = TensorFunctionSpace + if sobolev_space == HCurl: gradient = appctx.get("discrete_gradient", None) if gradient is None: from firedrake.preconditioners.fdm import tabulate_exterior_derivative - Q = V.reconstruct(family="Lagrange") + from firedrake.preconditioners.hiptmair import curl_to_grad + Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) gradient = tabulate_exterior_derivative(Q, V) bddcpc.setBDDCDiscreteGradient(gradient) @@ -70,8 +79,8 @@ def initialize(self, pc): if B is None: from firedrake.assemble import assemble degree = max(as_tuple(V.ufl_element().degree())) - Q = V.reconstruct(family="DG", degree=degree-1, variant=None) - b = inner(div(TrialFunction(V)), TestFunction(Q)) * dx + Q = make_function_space(V.mesh(), "DG", degree-1) + b = inner(div(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1)) B = assemble(b, mat_type="matfree") bddcpc.setBDDCDivergenceMat(B.petscmat) From f5d55775835d5d42b1df66fda6f79572ea8f78ce Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 21 Mar 2024 08:21:57 -0500 Subject: [PATCH 40/52] FDMPC: add option -fdm_mat_is_allow_repeated --- firedrake/preconditioners/fdm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f13e437a5b..80734a0178 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -125,6 +125,12 @@ def initialize(self, pc): use_amat = options.getBool("pc_use_amat", True) use_static_condensation = options.getBool("static_condensation", False) pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) + self.mat_type = pmat_type + + allow_repeated = False + if pmat_type == "is": + allow_repeated = options.getBool("mat_is_allow_repeated", True) + self.allow_repeated = allow_repeated appctx = self.get_appctx(pc) fcp = appctx.get("form_compiler_parameters") or {} @@ -200,8 +206,6 @@ def initialize(self, pc): # Assemble the FDM preconditioner with sparse local matrices self.V = V_fdm - self.mat_type = pmat_type - self.allow_repeated = pmat_type == "is" Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) self._assemble_P() From ce4c24721d46366117ed85b8ad5e09178668335d Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 21 Mar 2024 08:28:04 -0500 Subject: [PATCH 41/52] BDDC: pass the curl mat for 2D H(curl) --- firedrake/preconditioners/bddc.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index ec3df710e9..aa782abf47 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -4,7 +4,7 @@ from firedrake.dmhooks import get_function_space, get_appctx from firedrake.ufl_expr import TestFunction, TrialFunction from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace -from ufl import inner, div, dx, HCurl, HDiv +from ufl import curl, div, HCurl, HDiv, inner, dx from pyop2.utils import as_tuple import numpy @@ -65,24 +65,25 @@ def initialize(self, pc): else: make_function_space = TensorFunctionSpace - if sobolev_space == HCurl: - gradient = appctx.get("discrete_gradient", None) - if gradient is None: - from firedrake.preconditioners.fdm import tabulate_exterior_derivative - from firedrake.preconditioners.hiptmair import curl_to_grad - Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) - gradient = tabulate_exterior_derivative(Q, V) - bddcpc.setBDDCDiscreteGradient(gradient) - - elif sobolev_space == HDiv: + tdim = V.mesh().topological_dimension() + if tdim >= 2 and V.finat_element.formdegree == tdim-1: B = appctx.get("divergence_mat", None) if B is None: from firedrake.assemble import assemble degree = max(as_tuple(V.ufl_element().degree())) + d = {HCurl: curl, HDiv: div}[sobolev_space] Q = make_function_space(V.mesh(), "DG", degree-1) - b = inner(div(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1)) + b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1)) B = assemble(b, mat_type="matfree") bddcpc.setBDDCDivergenceMat(B.petscmat) + elif sobolev_space == HCurl: + gradient = appctx.get("discrete_gradient", None) + if gradient is None: + from firedrake.preconditioners.fdm import tabulate_exterior_derivative + from firedrake.preconditioners.hiptmair import curl_to_grad + Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) + gradient = tabulate_exterior_derivative(Q, V) + bddcpc.setBDDCDiscreteGradient(gradient) bddcpc.setFromOptions() self.pc = bddcpc From 180f5b86f34aa13cc38d2ce6b7f5cae8a63d7adc Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 21 Mar 2024 09:27:21 -0500 Subject: [PATCH 42/52] BDDC: hack to get a denser gradient --- firedrake/preconditioners/bddc.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index aa782abf47..c352b584e3 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -78,12 +78,15 @@ def initialize(self, pc): bddcpc.setBDDCDivergenceMat(B.petscmat) elif sobolev_space == HCurl: gradient = appctx.get("discrete_gradient", None) + order = None if gradient is None: from firedrake.preconditioners.fdm import tabulate_exterior_derivative from firedrake.preconditioners.hiptmair import curl_to_grad Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) - gradient = tabulate_exterior_derivative(Q, V) - bddcpc.setBDDCDiscreteGradient(gradient) + order = max(as_tuple(V.ufl_element().degree())) + # gradient = tabulate_exterior_derivative(Q, V) + gradient = tabulate_exterior_derivative(Q.reconstruct(variant=None), V.reconstruct(variant=None)) + bddcpc.setBDDCDiscreteGradient(gradient, order=order) bddcpc.setFromOptions() self.pc = bddcpc From 485a6a7f3f9ecddf867038573bd3e8b07f810b5b Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Thu, 21 Mar 2024 17:18:46 -0500 Subject: [PATCH 43/52] BDDC remove gradient sparsity hack --- firedrake/preconditioners/bddc.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index c352b584e3..4a86542c87 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -66,11 +66,11 @@ def initialize(self, pc): make_function_space = TensorFunctionSpace tdim = V.mesh().topological_dimension() + degree = max(as_tuple(V.ufl_element().degree())) if tdim >= 2 and V.finat_element.formdegree == tdim-1: B = appctx.get("divergence_mat", None) if B is None: from firedrake.assemble import assemble - degree = max(as_tuple(V.ufl_element().degree())) d = {HCurl: curl, HDiv: div}[sobolev_space] Q = make_function_space(V.mesh(), "DG", degree-1) b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1)) @@ -78,15 +78,12 @@ def initialize(self, pc): bddcpc.setBDDCDivergenceMat(B.petscmat) elif sobolev_space == HCurl: gradient = appctx.get("discrete_gradient", None) - order = None if gradient is None: from firedrake.preconditioners.fdm import tabulate_exterior_derivative from firedrake.preconditioners.hiptmair import curl_to_grad Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) - order = max(as_tuple(V.ufl_element().degree())) - # gradient = tabulate_exterior_derivative(Q, V) - gradient = tabulate_exterior_derivative(Q.reconstruct(variant=None), V.reconstruct(variant=None)) - bddcpc.setBDDCDiscreteGradient(gradient, order=order) + gradient = tabulate_exterior_derivative(Q, V) + bddcpc.setBDDCDiscreteGradient(gradient, order=degree) bddcpc.setFromOptions() self.pc = bddcpc From 984902a9ff839b41704323e5aa11cd748cf4db65 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 25 Mar 2024 13:47:43 -0500 Subject: [PATCH 44/52] BDDC: get vertex DOFs --- firedrake/preconditioners/bddc.py | 44 +++++++++++++++++++----- firedrake/preconditioners/facet_split.py | 34 +++++++++--------- firedrake/preconditioners/fdm.py | 5 +++ 3 files changed, 59 insertions(+), 24 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 4a86542c87..4bcd1d8b0a 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -1,10 +1,12 @@ from firedrake.preconditioners.base import PCBase from firedrake.preconditioners.patch import bcdofs +from firedrake.preconditioners.facet_split import restrict, restricted_dofs from firedrake.petsc import PETSc from firedrake.dmhooks import get_function_space, get_appctx from firedrake.ufl_expr import TestFunction, TrialFunction from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace from ufl import curl, div, HCurl, HDiv, inner, dx +from pyop2 import op2, PermutedMap from pyop2.utils import as_tuple import numpy @@ -58,12 +60,6 @@ def initialize(self, pc): appctx = self.get_appctx(pc) sobolev_space = V.ufl_element().sobolev_space - if V.shape == (): - make_function_space = FunctionSpace - elif len(V.shape) == 1: - make_function_space = VectorFunctionSpace - else: - make_function_space = TensorFunctionSpace tdim = V.mesh().topological_dimension() degree = max(as_tuple(V.ufl_element().degree())) @@ -72,6 +68,12 @@ def initialize(self, pc): if B is None: from firedrake.assemble import assemble d = {HCurl: curl, HDiv: div}[sobolev_space] + if V.shape == (): + make_function_space = FunctionSpace + elif len(V.shape) == 1: + make_function_space = VectorFunctionSpace + else: + make_function_space = TensorFunctionSpace Q = make_function_space(V.mesh(), "DG", degree-1) b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1)) B = assemble(b, mat_type="matfree") @@ -81,9 +83,12 @@ def initialize(self, pc): if gradient is None: from firedrake.preconditioners.fdm import tabulate_exterior_derivative from firedrake.preconditioners.hiptmair import curl_to_grad - Q = make_function_space(V.mesh(), curl_to_grad(V.ufl_element())) + Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element())) gradient = tabulate_exterior_derivative(Q, V) - bddcpc.setBDDCDiscreteGradient(gradient, order=degree) + vertices = get_vertex_dofs(Q) + vertices.view() + + bddcpc.setBDDCDiscreteGradient(gradient) bddcpc.setFromOptions() self.pc = bddcpc @@ -99,3 +104,26 @@ def apply(self, pc, x, y): def applyTranspose(self, pc, x, y): self.pc.applyTranspose(x, y) + + +def get_vertex_dofs(V): + W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "vertex")) + val = numpy.arange(0, V.dof_count, dtype=PETSc.IntType) + vdat = V.make_dat(val=val) + wdat = W.make_dat(val=numpy.full((W.dof_count,), -1, dtype=PETSc.IntType)) + + p1_dofs = restricted_dofs(W.finat_element, V.finat_element, dim=1) + Vsize = V.finat_element.space_dimension() * V.value_size + Wsize = W.finat_element.space_dimension() * W.value_size + eperm = numpy.concatenate([p1_dofs, numpy.setdiff1d(numpy.arange(0, Vsize, dtype=PETSc.IntType), p1_dofs)]) + pmap = PermutedMap(V.cell_node_map(), eperm) + kernel_code = f""" + void vertex_dofs(PetscInt *restrict w, const PetscInt *restrict v) {{ + for (PetscInt i=0; i<{Wsize}; i++) w[i] = v[i]; + }}""" + kernel = op2.Kernel(kernel_code, "vertex_dofs", requires_zeroed_output_arguments=False) + op2.par_loop(kernel, V.mesh().cell_set, wdat(op2.WRITE, W.cell_node_map()), vdat(op2.READ, pmap)) + indices = wdat.data_ro + V.dof_dset.lgmap.apply(indices, result=indices) + vertex_dofs = PETSc.IS().createGeneral(indices, comm=V.comm) + return vertex_dofs diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index 3c35d7423c..6b889ab7eb 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -2,6 +2,7 @@ from mpi4py import MPI from pyop2 import op2, PermutedMap from pyop2.utils import as_tuple +from finat.ufl import RestrictedElement, MixedElement, TensorElement, VectorElement from firedrake.petsc import PETSc from firedrake.preconditioners.base import PCBase import firedrake.dmhooks as dmhooks @@ -39,9 +40,8 @@ def get_permutation(self, V, W): return self._permutation_cache[key] def initialize(self, pc): - from finat.ufl import RestrictedElement, MixedElement, TensorElement, VectorElement - from firedrake import FunctionSpace, TestFunctions, TrialFunctions from firedrake.assemble import get_assembler + from firedrake import FunctionSpace, TestFunctions, TrialFunctions _, P = pc.getOperators() appctx = self.get_appctx(pc) @@ -66,14 +66,6 @@ def initialize(self, pc): V = a.arguments()[-1].function_space() assert len(V) == 1, "Interior-facet decomposition of mixed elements is not supported" - def restrict(ele, restriction_domain): - if isinstance(ele, VectorElement): - return type(ele)(restrict(ele._sub_element, restriction_domain), dim=ele.num_sub_elements) - elif isinstance(ele, TensorElement): - return type(ele)(restrict(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele._symmety) - else: - return RestrictedElement(ele, restriction_domain) - # W = V[interior] * V[facet] W = FunctionSpace(V.mesh(), MixedElement([restrict(V.ufl_element(), d) for d in ("interior", "facet")])) assert W.dim() == V.dim(), "Dimensions of the original and decomposed spaces do not match" @@ -201,28 +193,38 @@ def destroy(self, pc): self.perm.destroy() -def split_dofs(elem): +def restrict(ele, restriction_domain): + if isinstance(ele, VectorElement): + return type(ele)(restrict(ele._sub_element, restriction_domain), dim=ele.num_sub_elements) + elif isinstance(ele, TensorElement): + return type(ele)(restrict(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele._symmety) + else: + return RestrictedElement(ele, restriction_domain) + + +def split_dofs(elem, dim=None): """ Split DOFs into interior and facet DOF, where facets are sorted by entity. """ + if dim is None: + dim = elem.cell.get_spatial_dimension() entity_dofs = elem.entity_dofs() - ndim = elem.cell.get_spatial_dimension() edofs = [[], []] for key in sorted(entity_dofs.keys()): vals = entity_dofs[key] edim = sum(as_tuple(key)) for k in sorted(vals.keys()): - edofs[edim < ndim].extend(sorted(vals[k])) + edofs[edim < dim].extend(sorted(vals[k])) return tuple(numpy.array(e, dtype=PETSc.IntType) for e in edofs) -def restricted_dofs(celem, felem): +def restricted_dofs(celem, felem, dim=None): """ Find which DOFs from felem are on celem :arg celem: the restricted :class:`finat.FiniteElement` :arg felem: the unrestricted :class:`finat.FiniteElement` :returns: :class:`numpy.array` with indices of felem that correspond to celem """ - csplit = split_dofs(celem) - fsplit = split_dofs(felem) + csplit = split_dofs(celem, dim=dim) + fsplit = split_dofs(felem, dim=dim) if len(csplit[0]) and len(csplit[1]): csplit = [numpy.concatenate(csplit)] fsplit = [numpy.concatenate(fsplit)] diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 80734a0178..5a1d6818fc 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -780,6 +780,11 @@ def setup_block(self, Vrow, Vcol): P.setSizes(sizes) P.setISAllowRepeated(self.allow_repeated) P.setLGMap(rmap, cmap) + if on_diag and ptype == "is" and self.allow_repeated: + bsize = Vrow.finat_element.space_dimension() * Vrow.value_size + local_mat = P.getISLocalMat() + nblocks = local_mat.getSize()[0] // bsize + local_mat.setVariableBlockSizes([bsize] * nblocks) P.setPreallocationNNZ((dnz, onz)) if not (ptype.endswith("sbaij") or ptype == "is"): From b7de8705c15cd591de3998010e77319d4b6d0f6e Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 26 Mar 2024 15:08:38 +0300 Subject: [PATCH 45/52] add view from options for pmat --- firedrake/preconditioners/fdm.py | 1 + 1 file changed, 1 insertion(+) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index 5a1d6818fc..f1d62d2b1d 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -208,6 +208,7 @@ def initialize(self, pc): self.V = V_fdm Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) + self.assembly_callables.append(partial(Pmat.viewFromOptions, "-pmat_view", fdmpc)) self._assemble_P() fdmpc.setOperators(A=Amat, P=Pmat) From d5fc173be5ba448634ee692a9f80cef30b52178c Mon Sep 17 00:00:00 2001 From: Stefano Zampini Date: Tue, 26 Mar 2024 15:21:50 +0300 Subject: [PATCH 46/52] Finish discrete gradient --- firedrake/preconditioners/bddc.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 4bcd1d8b0a..2b4678a119 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -85,10 +85,18 @@ def initialize(self, pc): from firedrake.preconditioners.hiptmair import curl_to_grad Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element())) gradient = tabulate_exterior_derivative(Q, V) - vertices = get_vertex_dofs(Q) - vertices.view() - - bddcpc.setBDDCDiscreteGradient(gradient) + corners = get_vertex_dofs(Q) + gradient.compose('_elements_corners', corners) + grad_args = (gradient,) + grad_kwargs = {'order' : degree} + else: + try: + grad_args, grad_kwargs = gradient + except: + grad_args = (gradient,) + grad_kwargs = dict() + + bddcpc.setBDDCDiscreteGradient(*grad_args, **grad_kwargs) bddcpc.setFromOptions() self.pc = bddcpc From f8d7bc6cf436a2ba19d9cc4ed00b4a71303321dd Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 26 Mar 2024 09:13:10 -0500 Subject: [PATCH 47/52] move restricted_local_dofs to facet_split --- firedrake/preconditioners/bddc.py | 21 +---------- firedrake/preconditioners/facet_split.py | 48 ++++++++++++++++++------ 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 4bcd1d8b0a..57f96bddc6 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -1,12 +1,11 @@ from firedrake.preconditioners.base import PCBase from firedrake.preconditioners.patch import bcdofs -from firedrake.preconditioners.facet_split import restrict, restricted_dofs +from firedrake.preconditioners.facet_split import restrict, restricted_local_dofs from firedrake.petsc import PETSc from firedrake.dmhooks import get_function_space, get_appctx from firedrake.ufl_expr import TestFunction, TrialFunction from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace from ufl import curl, div, HCurl, HDiv, inner, dx -from pyop2 import op2, PermutedMap from pyop2.utils import as_tuple import numpy @@ -86,7 +85,6 @@ def initialize(self, pc): Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element())) gradient = tabulate_exterior_derivative(Q, V) vertices = get_vertex_dofs(Q) - vertices.view() bddcpc.setBDDCDiscreteGradient(gradient) @@ -108,22 +106,7 @@ def applyTranspose(self, pc, x, y): def get_vertex_dofs(V): W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "vertex")) - val = numpy.arange(0, V.dof_count, dtype=PETSc.IntType) - vdat = V.make_dat(val=val) - wdat = W.make_dat(val=numpy.full((W.dof_count,), -1, dtype=PETSc.IntType)) - - p1_dofs = restricted_dofs(W.finat_element, V.finat_element, dim=1) - Vsize = V.finat_element.space_dimension() * V.value_size - Wsize = W.finat_element.space_dimension() * W.value_size - eperm = numpy.concatenate([p1_dofs, numpy.setdiff1d(numpy.arange(0, Vsize, dtype=PETSc.IntType), p1_dofs)]) - pmap = PermutedMap(V.cell_node_map(), eperm) - kernel_code = f""" - void vertex_dofs(PetscInt *restrict w, const PetscInt *restrict v) {{ - for (PetscInt i=0; i<{Wsize}; i++) w[i] = v[i]; - }}""" - kernel = op2.Kernel(kernel_code, "vertex_dofs", requires_zeroed_output_arguments=False) - op2.par_loop(kernel, V.mesh().cell_set, wdat(op2.WRITE, W.cell_node_map()), vdat(op2.READ, pmap)) - indices = wdat.data_ro + indices = restricted_local_dofs(V, W) V.dof_dset.lgmap.apply(indices, result=indices) vertex_dofs = PETSc.IS().createGeneral(indices, comm=V.comm) return vertex_dofs diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index 6b889ab7eb..a700b39778 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -32,7 +32,7 @@ class FacetSplitPC(PCBase): def get_permutation(self, V, W): key = (V, W) if key not in self._permutation_cache: - indices = get_permutation_map(V, W) + indices = restricted_local_dofs(V, W) if V._comm.allreduce(numpy.all(indices[:-1] <= indices[1:]), MPI.PROD): self._permutation_cache[key] = None else: @@ -237,10 +237,19 @@ def restricted_dofs(celem, felem, dim=None): return fsplit[k][perm] -def get_permutation_map(V, W): - perm = numpy.full((V.dof_count,), -1, dtype=PETSc.IntType) - vdat = V.make_dat(val=perm) +def copy_kernel(size): + kernel_code = f""" + void copy(PetscInt *restrict y, const PetscInt *restrict x) {{ + for (PetscInt i=0; i<{size}; i++) y[i] = x[i]; + }}""" + return op2.Kernel(kernel_code, "copy", requires_zeroed_output_arguments=False) + +def reference_space_dimension(V): + return sum(Vsub.finat_element.space_dimension() * Vsub.value_size for Vsub in V) + + +def get_permutation_map(V, W): offset = 0 wdats = [] for Wsub in W: @@ -248,14 +257,29 @@ def get_permutation_map(V, W): wdats.append(Wsub.make_dat(val=val)) offset += Wsub.dof_dset.layout_vec.sizes[0] wdat = op2.MixedDat(wdats) - size = sum(Wsub.finat_element.space_dimension() * Wsub.value_size for Wsub in W) + vdat = V.make_dat(val=numpy.full((V.dof_count,), -1, dtype=PETSc.IntType)) + eperm = numpy.concatenate([restricted_dofs(Wsub.finat_element, V.finat_element) for Wsub in W]) pmap = PermutedMap(V.cell_node_map(), eperm) - - kernel_code = f""" - void permutation(PetscInt *restrict v, const PetscInt *restrict w) {{ - for (PetscInt i=0; i<{size}; i++) v[i] = w[i]; - }}""" - kernel = op2.Kernel(kernel_code, "permutation", requires_zeroed_output_arguments=False) - op2.par_loop(kernel, V.mesh().cell_set, vdat(op2.WRITE, pmap), wdat(op2.READ, W.cell_node_map())) + size = reference_space_dimension(V) + op2.par_loop(copy_kernel(size), V.mesh().cell_set, vdat(op2.WRITE, pmap), wdat(op2.READ, W.cell_node_map())) return vdat.data_ro + + +def restricted_local_dofs(V, W): + vdat = V.make_dat(val=numpy.arange(0, V.dof_count, dtype=PETSc.IntType)) + wdat = W.make_dat(val=numpy.full((W.dof_count,), -1, dtype=PETSc.IntType)) + + Vsize = reference_space_dimension(V) + Wsize = reference_space_dimension(W) + edofs = W[len(W)-1].finat_element.entity_dofs() + wdim = 0 + for dim in sorted(edofs): + if any(len(edofs[dim][entity]) > 0 for entity in edofs[dim]): + wdim = sum(as_tuple(dim)) + 1 + + eperm = numpy.concatenate([restricted_dofs(Wsub.finat_element, V.finat_element, dim=wdim) for Wsub in W]) + eperm = numpy.concatenate([eperm, numpy.setdiff1d(numpy.arange(0, Vsize, dtype=PETSc.IntType), eperm)]) + pmap = PermutedMap(V.cell_node_map(), eperm) + op2.par_loop(copy_kernel(Wsize), V.mesh().cell_set, wdat(op2.WRITE, W.cell_node_map()), vdat(op2.READ, pmap)) + return wdat.data_ro From b2f9ec9e5f555f977ba5c736ab3f679b034a1cd4 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Tue, 26 Mar 2024 09:16:17 -0500 Subject: [PATCH 48/52] lint --- firedrake/preconditioners/bddc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 79718f2149..45828f104e 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -87,13 +87,13 @@ def initialize(self, pc): corners = get_vertex_dofs(Q) gradient.compose('_elements_corners', corners) grad_args = (gradient,) - grad_kwargs = {'order' : degree} + grad_kwargs = {'order': degree} else: try: - grad_args, grad_kwargs = gradient - except: - grad_args = (gradient,) - grad_kwargs = dict() + grad_args, grad_kwargs = gradient + except ValueError: + grad_args = (gradient,) + grad_kwargs = dict() bddcpc.setBDDCDiscreteGradient(*grad_args, **grad_kwargs) From 389b007782ec0a92f4b4531eb20bf56ecfd67484 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 15 Apr 2024 23:08:06 +0100 Subject: [PATCH 49/52] Fix FacetSplit --- firedrake/preconditioners/facet_split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index a700b39778..c0185e7f37 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -32,7 +32,7 @@ class FacetSplitPC(PCBase): def get_permutation(self, V, W): key = (V, W) if key not in self._permutation_cache: - indices = restricted_local_dofs(V, W) + indices = get_permutation_map(V, W) if V._comm.allreduce(numpy.all(indices[:-1] <= indices[1:]), MPI.PROD): self._permutation_cache[key] = None else: From 99026c3e9eaa6e130072994113796f74f6b05153 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 15 Apr 2024 17:07:40 -0600 Subject: [PATCH 50/52] Fix FacetSplit --- firedrake/preconditioners/facet_split.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/firedrake/preconditioners/facet_split.py b/firedrake/preconditioners/facet_split.py index a700b39778..8de02cdacb 100644 --- a/firedrake/preconditioners/facet_split.py +++ b/firedrake/preconditioners/facet_split.py @@ -32,7 +32,7 @@ class FacetSplitPC(PCBase): def get_permutation(self, V, W): key = (V, W) if key not in self._permutation_cache: - indices = restricted_local_dofs(V, W) + indices = get_permutation_map(V, W) if V._comm.allreduce(numpy.all(indices[:-1] <= indices[1:]), MPI.PROD): self._permutation_cache[key] = None else: @@ -267,8 +267,8 @@ def get_permutation_map(V, W): def restricted_local_dofs(V, W): - vdat = V.make_dat(val=numpy.arange(0, V.dof_count, dtype=PETSc.IntType)) - wdat = W.make_dat(val=numpy.full((W.dof_count,), -1, dtype=PETSc.IntType)) + vdat = V.make_dat(val=numpy.arange(0, sum(as_tuple(V.dof_count)), dtype=PETSc.IntType)) + wdat = W.make_dat(val=numpy.full((sum(as_tuple(W.dof_count)),), -1, dtype=PETSc.IntType)) Vsize = reference_space_dimension(V) Wsize = reference_space_dimension(W) From b24589fc6f34f5d8aa73b63fb203e0de2aaaad58 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Mon, 29 Apr 2024 08:47:07 +0100 Subject: [PATCH 51/52] do not view --- firedrake/preconditioners/fdm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f1d62d2b1d..92ab8f7e12 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -323,7 +323,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) - # assembly_callables.append(P.view) else: v = numpy.ones(bdofs.shape, PETSc.ScalarType) assembly_callables.append(partial(P.setValuesLocalRCV, bdofs, bdofs, v, addv=addv)) From dec61804e0d393c18b27d03cfb20e0ad62269804 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 4 Dec 2024 13:46:50 +0000 Subject: [PATCH 52/52] docs --- firedrake/preconditioners/bddc.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py index 0c000c16c2..e7c47985a2 100644 --- a/firedrake/preconditioners/bddc.py +++ b/firedrake/preconditioners/bddc.py @@ -13,7 +13,10 @@ class BDDCPC(PCBase): - """PC for PETSc PCBDDC""" + """PC for PETSc PCBDDC (Balancing Domain Decomposition with Constraints). + + A Mat of type MATIS is required. Currently, this type is only supported by FDMPC. + """ _prefix = "bddc_"