diff --git a/AUTHORS.rst b/AUTHORS.rst
index a4a5f4ddc5..80c7f1bde3 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -90,6 +90,8 @@ Christopher Hawkes
 
 Miklós Homolya
 
+Joshua Hope-Collins...........<https://www.imperial.ac.uk/people/joshua.hope-collins13>
+
 Christian T. Jacobs
 
 Darko Janeković
diff --git a/CITATION.rst b/CITATION.rst
index 2965de8d02..48513922ed 100644
--- a/CITATION.rst
+++ b/CITATION.rst
@@ -12,7 +12,7 @@ If you publish results using Firedrake, we would be grateful if you would cite t
 
   @manual{FiredrakeUserManual,
     title        = {Firedrake User Manual},
-    author       = {David A. Ham and Paul H. J. Kelly and Lawrence Mitchell and Colin J. Cotter and Robert C. Kirby and Koki Sagiyama and Nacime Bouziani and Thomas J. Gregory and Jack Betteridge and Daniel R. Shapero and Reuben W. Nixon-Hill and Connor J. Ward and Patrick E. Farrell and Pablo D. Brubeck and India Marsden and Daiane I. Dolci and Sophia Vorderwuelbecke and Thomas H. Gibson and Miklós Homolya and Tianjiao Sun and Andrew T. T. McRae and Fabio Luporini and Alastair Gregory and Michael Lange and Simon W. Funke and Florian Rathgeber and Gheorghe-Teodor Bercea and Graham R. Markall},
+    author       = {David A. Ham and Paul H. J. Kelly and Lawrence Mitchell and Colin J. Cotter and Robert C. Kirby and Koki Sagiyama and Nacime Bouziani and Thomas J. Gregory and Jack Betteridge and Daniel R. Shapero and Reuben W. Nixon-Hill and Connor J. Ward and Patrick E. Farrell and Pablo D. Brubeck and India Marsden and Daiane I. Dolci and Joshua Hope-Collins and Sophia Vorderwuelbecke and Thomas H. Gibson and Miklós Homolya and Tianjiao Sun and Andrew T. T. McRae and Fabio Luporini and Alastair Gregory and Michael Lange and Simon W. Funke and Florian Rathgeber and Gheorghe-Teodor Bercea and Graham R. Markall},
     organization = {Imperial College London and University of Oxford and Baylor University and University of Washington},
     edition      = {First edition},
     year         = {2023},
diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py
index cd75ae7380..491a73657b 100644
--- a/firedrake/preconditioners/__init__.py
+++ b/firedrake/preconditioners/__init__.py
@@ -12,3 +12,4 @@
 from firedrake.preconditioners.fdm import *          # noqa: F401
 from firedrake.preconditioners.hiptmair import *     # noqa: F401
 from firedrake.preconditioners.facet_split import *  # noqa: F401
+from firedrake.preconditioners.bddc import *         # noqa: F401
diff --git a/firedrake/preconditioners/bddc.py b/firedrake/preconditioners/bddc.py
new file mode 100644
index 0000000000..5fb1fca51a
--- /dev/null
+++ b/firedrake/preconditioners/bddc.py
@@ -0,0 +1,142 @@
+from firedrake.preconditioners.base import PCBase
+from firedrake.preconditioners.patch import bcdofs
+from firedrake.preconditioners.facet_split import restrict, get_restriction_indices
+from firedrake.petsc import PETSc
+from firedrake.dmhooks import get_function_space, get_appctx
+from firedrake.ufl_expr import TestFunction, TrialFunction
+from firedrake.functionspace import FunctionSpace, VectorFunctionSpace, TensorFunctionSpace
+from ufl import curl, div, HCurl, HDiv, inner, dx
+from pyop2.utils import as_tuple
+import numpy
+
+__all__ = ("BDDCPC",)
+
+
+class BDDCPC(PCBase):
+    """PC for PETSc PCBDDC (Balancing Domain Decomposition by Constraints).
+    This is a domain decomposition method using subdomains defined by the
+    blocks in a Mat of type IS.
+
+    Internally, this PC creates a PETSc PCBDDC object that can be controlled by
+    the options:
+    - ``'bddc_pc_bddc_neumann'`` to set sub-KSPs on subdomains excluding corners,
+    - ``'bddc_pc_bddc_dirichlet'`` to set sub-KSPs on subdomain interiors,
+    - ``'bddc_pc_bddc_coarse'`` to set the coarse solver KSP.
+
+    This PC also inspects optional arguments supplied in the application context:
+    - ``'discrete_gradient'`` for problems in H(curl), this sets the arguments
+    (a Mat tabulating the gradient of the auxiliary H1 space) and
+    keyword arguments supplied to ``PETSc.PC.setBDDCDiscreteGradient``.
+    - ``'divergence_mat'`` for 3D problems in H(div), this sets the Mat with the
+    assembled bilinear form testing the divergence against an L2 space.
+
+    Notes
+    -----
+    Currently the Mat type IS is only supported by FDMPC.
+
+    """
+
+    _prefix = "bddc_"
+
+    def initialize(self, pc):
+        # Get context from pc
+        _, P = pc.getOperators()
+        dm = pc.getDM()
+        self.prefix = pc.getOptionsPrefix() + self._prefix
+
+        V = get_function_space(dm)
+
+        # Create new PC object as BDDC type
+        bddcpc = PETSc.PC().create(comm=pc.comm)
+        bddcpc.incrementTabLevel(1, parent=pc)
+        bddcpc.setOptionsPrefix(self.prefix)
+        bddcpc.setOperators(*pc.getOperators())
+        bddcpc.setType(PETSc.PC.Type.BDDC)
+
+        opts = PETSc.Options(bddcpc.getOptionsPrefix())
+        if V.ufl_element().variant() == "fdm" and "pc_bddc_use_local_mat_graph" not in opts:
+            # Disable computation of disconected components of subdomain interfaces
+            opts["pc_bddc_use_local_mat_graph"] = False
+
+        ctx = get_appctx(dm)
+        bcs = tuple(ctx._problem.bcs)
+        if V.extruded:
+            boundary_nodes = numpy.unique(numpy.concatenate(list(map(V.boundary_nodes, ("on_boundary", "top", "bottom")))))
+        else:
+            boundary_nodes = V.boundary_nodes("on_boundary")
+        if len(bcs) == 0:
+            dir_nodes = numpy.empty(0, dtype=boundary_nodes.dtype)
+        else:
+            dir_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs]))
+        neu_nodes = numpy.setdiff1d(boundary_nodes, dir_nodes)
+
+        V.dof_dset.lgmap.apply(dir_nodes, result=dir_nodes)
+        dir_bndr = PETSc.IS().createGeneral(dir_nodes, comm=pc.comm)
+        bddcpc.setBDDCDirichletBoundaries(dir_bndr)
+
+        V.dof_dset.lgmap.apply(neu_nodes, result=neu_nodes)
+        neu_bndr = PETSc.IS().createGeneral(neu_nodes, comm=pc.comm)
+        bddcpc.setBDDCNeumannBoundaries(neu_bndr)
+
+        appctx = self.get_appctx(pc)
+        sobolev_space = V.ufl_element().sobolev_space
+
+        tdim = V.mesh().topological_dimension()
+        degree = max(as_tuple(V.ufl_element().degree()))
+        if tdim >= 2 and V.finat_element.formdegree == tdim-1:
+            B = appctx.get("divergence_mat", None)
+            if B is None:
+                from firedrake.assemble import assemble
+                d = {HCurl: curl, HDiv: div}[sobolev_space]
+                if V.shape == ():
+                    make_function_space = FunctionSpace
+                elif len(V.shape) == 1:
+                    make_function_space = VectorFunctionSpace
+                else:
+                    make_function_space = TensorFunctionSpace
+                Q = make_function_space(V.mesh(), "DG", degree-1)
+                b = inner(d(TrialFunction(V)), TestFunction(Q)) * dx(degree=2*(degree-1))
+                B = assemble(b, mat_type="matfree")
+            bddcpc.setBDDCDivergenceMat(B.petscmat)
+        elif sobolev_space == HCurl:
+            gradient = appctx.get("discrete_gradient", None)
+            if gradient is None:
+                from firedrake.preconditioners.fdm import tabulate_exterior_derivative
+                from firedrake.preconditioners.hiptmair import curl_to_grad
+                Q = FunctionSpace(V.mesh(), curl_to_grad(V.ufl_element()))
+                gradient = tabulate_exterior_derivative(Q, V)
+                corners = get_vertex_dofs(Q)
+                gradient.compose('_elements_corners', corners)
+                grad_args = (gradient,)
+                grad_kwargs = {'order': degree}
+            else:
+                try:
+                    grad_args, grad_kwargs = gradient
+                except ValueError:
+                    grad_args = (gradient,)
+                    grad_kwargs = dict()
+
+            bddcpc.setBDDCDiscreteGradient(*grad_args, **grad_kwargs)
+
+        bddcpc.setFromOptions()
+        self.pc = bddcpc
+
+    def view(self, pc, viewer=None):
+        self.pc.view(viewer=viewer)
+
+    def update(self, pc):
+        pass
+
+    def apply(self, pc, x, y):
+        self.pc.apply(x, y)
+
+    def applyTranspose(self, pc, x, y):
+        self.pc.applyTranspose(x, y)
+
+
+def get_vertex_dofs(V):
+    W = FunctionSpace(V.mesh(), restrict(V.ufl_element(), "vertex"))
+    indices = get_restriction_indices(V, W)
+    V.dof_dset.lgmap.apply(indices, result=indices)
+    vertex_dofs = PETSc.IS().createGeneral(indices, comm=V.comm)
+    return vertex_dofs
diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py
index 82e56057e7..1696801cb4 100644
--- a/firedrake/preconditioners/fdm.py
+++ b/firedrake/preconditioners/fdm.py
@@ -13,6 +13,7 @@
 from firedrake.functionspace import FunctionSpace, MixedFunctionSpace
 from firedrake.function import Function
 from firedrake.cofunction import Cofunction
+from firedrake.parloops import par_loop
 from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions
 from firedrake.utils import cached_property
 from firedrake_citations import Citations
@@ -63,6 +64,33 @@
 __all__ = ("FDMPC", "PoissonFDMPC")
 
 
+def broken_function(V, val):
+    W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element()))
+    w = Function(W, dtype=val.dtype)
+    v = Function(V, val=val)
+    domain = "{[i]: 0 <= i < v.dofs}"
+    instructions = """
+    for i
+        w[i] = v[i]
+    end
+    """
+    par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)})
+    return w
+
+
+def mask_local_indices(V, lgmap, repeated=False):
+    mask = lgmap.indices
+    if repeated:
+        w = broken_function(V, mask)
+        V = w.function_space()
+        mask = w.dat.data_ro_with_halos
+    indices = numpy.arange(len(mask), dtype=PETSc.IntType)
+    indices[mask == -1] = -1
+    indices_dat = V.make_dat(val=indices)
+    indices_acc = indices_dat(op2.READ, V.cell_node_map())
+    return indices_acc
+
+
 class FDMPC(PCBase):
     """
     A preconditioner for tensor-product elements that changes the shape
@@ -100,6 +128,12 @@ def initialize(self, pc):
         use_amat = options.getBool("pc_use_amat", True)
         use_static_condensation = options.getBool("static_condensation", False)
         pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ)
+        self.mat_type = pmat_type
+
+        allow_repeated = False
+        if pmat_type == "is":
+            allow_repeated = options.getBool("mat_is_allow_repeated", True)
+        self.allow_repeated = allow_repeated
 
         appctx = self.get_appctx(pc)
         fcp = appctx.get("form_compiler_parameters") or {}
@@ -124,14 +158,11 @@ def initialize(self, pc):
 
         # Transform the problem into the space with FDM shape functions
         V = J.arguments()[-1].function_space()
-        element = V.ufl_element()
-        e_fdm = element.reconstruct(variant=self._variant)
-
-        if element == e_fdm:
-            V_fdm, J_fdm, bcs_fdm = (V, J, bcs)
+        V_fdm = V.reconstruct(variant=self._variant)
+        if V == V_fdm:
+            J_fdm, bcs_fdm = (J, bcs)
         else:
             # Reconstruct Jacobian and bcs with variant element
-            V_fdm = FunctionSpace(V.mesh(), e_fdm)
             J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments()))
             bcs_fdm = []
             for bc in bcs:
@@ -174,11 +205,10 @@ def initialize(self, pc):
         self.pc = fdmpc
 
         # Assemble the FDM preconditioner with sparse local matrices
+        self.V = V_fdm
         Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp,
                                                                    pmat_type, use_static_condensation, use_amat)
-        Pmat.setNullSpace(Amat.getNullSpace())
-        Pmat.setTransposeNullSpace(Amat.getTransposeNullSpace())
-        Pmat.setNearNullSpace(Amat.getNearNullSpace())
+        self.assembly_callables.append(partial(Pmat.viewFromOptions, "-pmat_view", fdmpc))
         self._assemble_P()
 
         fdmpc.setOperators(A=Amat, P=Pmat)
@@ -232,10 +262,9 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
 
         # Create data structures needed for assembly
         self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V}
-        self.indices = {Vsub: op2.Dat(Vsub.dof_dset, self.lgmaps[Vsub].indices) for Vsub in V}
+        self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], self.allow_repeated) for Vsub in V}
         self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp)
         self.assemblers = {}
-        self.kernels = []
         Pmats = {}
 
         # Dictionary with kernel to compute the Schur complement
@@ -260,7 +289,6 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
                 Amat, Pmats = self.condense(Amat, J, bcs, fcp, pc_type=interior_pc_type)
 
         diagonal_terms = []
-        addv = PETSc.InsertMode.ADD_VALUES
         # Loop over all pairs of subspaces
         for Vrow, Vcol in product(V, V):
             if (Vrow, Vcol) in Pmats:
@@ -272,43 +300,33 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
 
             # Preallocate and assemble the FDM auxiliary sparse operator
             on_diag = Vrow == Vcol
-            sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol))
-            ptype = pmat_type if on_diag else PETSc.Mat.Type.AIJ
-
-            preallocator = PETSc.Mat().create(comm=self.comm)
-            preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
-            preallocator.setSizes(sizes)
-            preallocator.setUp()
-            preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
-            self.set_values(preallocator, Vrow, Vcol, addv, mat_type=ptype)
-            preallocator.assemble()
-            dnz, onz = get_preallocation(preallocator, sizes[0][0])
-            if on_diag:
-                numpy.maximum(dnz, 1, out=dnz)
-            preallocator.destroy()
-
-            P = PETSc.Mat().create(comm=self.comm)
-            P.setType(ptype)
-            P.setSizes(sizes)
-            P.setPreallocationNNZ((dnz, onz))
-            P.setOption(PETSc.Mat.Option.IGNORE_OFF_PROC_ENTRIES, False)
-            P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
-            P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
-            P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
-            if ptype.endswith("sbaij"):
-                P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
-            P.setUp()
+            P = self.setup_block(Vrow, Vcol)
+            addv = self.insert_mode[Vrow, Vcol]
+
+            assemble_sparsity = P.getType() == "is"
+            if assemble_sparsity:
+                self.set_values(P, Vrow, Vcol, mat_type="preallocator")
+                if on_diag:
+                    # populate diagonal entries
+                    i = numpy.arange(P.getLGMap()[0].getSize(), dtype=PETSc.IntType)[:, None]
+                    v = numpy.ones(i.shape, dtype=PETSc.ScalarType)
+                    P.setValuesLocalRCV(i, i, v, addv=addv)
+                P.assemble()
 
             # append callables to zero entries, insert element matrices, and apply BCs
             assembly_callables.append(P.zeroEntries)
-            assembly_callables.append(partial(self.set_values, P, Vrow, Vcol, addv, mat_type=ptype))
+            assembly_callables.append(partial(self.set_values, P, Vrow, Vcol))
             if on_diag:
                 own = Vrow.dof_dset.layout_vec.getLocalSize()
                 bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None]
-                Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs)
-                if len(bdofs) > 0:
-                    vals = numpy.ones(bdofs.shape, dtype=PETSc.RealType)
-                    assembly_callables.append(partial(P.setValuesRCV, bdofs, bdofs, vals, addv))
+                if assemble_sparsity:
+                    Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs)
+                    assembly_callables.append(P.assemble)
+                    assembly_callables.append(partial(P.zeroRows, bdofs, 1.0))
+                else:
+                    v = numpy.ones(bdofs.shape, PETSc.ScalarType)
+                    assembly_callables.append(partial(P.setValuesLocalRCV, bdofs, bdofs, v, addv=addv))
+                    assembly_callables.append(P.assemble)
 
                 gamma = self.coefficients.get("facet")
                 if gamma is not None and gamma.function_space() == Vrow.dual():
@@ -316,11 +334,29 @@ def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensati
                         diagonal_terms.append(partial(P.setDiagonal, diag, addv=addv))
             Pmats[Vrow, Vcol] = P
 
+        def sub_nullspace(nsp, iset):
+            if not nsp.handle or iset is None:
+                return nsp
+            vectors = [vec.getSubVector(iset).copy() for vec in nsp.getVecs()]
+            for v in vectors:
+                v.normalize()
+            return PETSc.NullSpace().create(constant=nsp.hasConstant(),
+                                            vectors=vectors,
+                                            comm=nsp.getComm())
+
+        def set_nullspaces(P, A, iset=None):
+            P.setNullSpace(sub_nullspace(A.getNullSpace(), iset))
+            P.setTransposeNullSpace(sub_nullspace(A.getTransposeNullSpace(), iset))
+            P.setNearNullSpace(sub_nullspace(A.getNearNullSpace(), iset))
+
         if len(V) == 1:
             Pmat = Pmats[V, V]
+            set_nullspaces(Pmat, Amat)
         else:
             Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm)
-        assembly_callables.append(Pmat.assemble)
+            for Vrow, iset in zip(V, Pmat.getNestISs()[0]):
+                set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset)
+            assembly_callables.append(Pmat.assemble)
         assembly_callables.extend(diagonal_terms)
         return Amat, Pmat, assembly_callables
 
@@ -418,10 +454,10 @@ def condense(self, A, J, bcs, fcp, pc_type="icc"):
         Pmats = dict(Smats)
         C0 = self.assemble_reference_tensor(V0)
         R0 = self.assemble_reference_tensor(V0, transpose=True)
+
         A0 = TripleProductKernel(R0, self._element_mass_matrix, C0)
         K0 = InteriorSolveKernel(A0, J00, fcp=fcp, pc_type=pc_type)
         K1 = ImplicitSchurComplementKernel(K0)
-        self.kernels.extend((A0, K0, K1))
         kernels = {V0: K0, V1: K1}
         comm = self.comm
         args = [self.coefficients["cell"], V0.mesh().coordinates, *J00.coefficients(), *extract_firedrake_constants(J00)]
@@ -495,9 +531,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=False):
         V = args_J[0].function_space()
         fe = V.finat_element
         formdegree = fe.formdegree
-        degree = fe.degree
-        if type(degree) != int:
-            degree, = set(degree)
+        degree, = set(as_tuple(fe.degree))
         qdeg = degree
         if formdegree == tdim:
             qfam = "DG" if tdim == 1 else "DQ"
@@ -545,6 +579,7 @@ def assemble_coefficients(self, J, fcp, block_diagonal=False):
         facet_integrals = [i for i in J.integrals() if "facet" in i.integral_type()]
         J_facet = expand_indices(expand_derivatives(ufl.Form(facet_integrals)))
         if len(J_facet.integrals()) > 0:
+            from firedrake.assemble import get_assembler
             gamma = coefficients.setdefault("facet", Function(V.dual()))
             assembly_callables.append(partial(get_assembler(J_facet, form_compiler_parameters=fcp, tensor=gamma, diagonal=True).assemble, tensor=gamma))
         return coefficients, assembly_callables
@@ -564,9 +599,7 @@ def assemble_reference_tensor(self, V, transpose=False, sort_interior=False):
         fe = V.finat_element
         tdim = fe.cell.get_spatial_dimension()
         formdegree = fe.formdegree
-        degree = fe.degree
-        if type(degree) != int:
-            degree, = set(degree)
+        degree, = set(as_tuple(fe.degree))
         if formdegree == tdim:
             degree = degree + 1
         is_interior, is_facet = is_restricted(fe)
@@ -663,8 +696,109 @@ def _element_mass_matrix(self):
             data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1))
         return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=COMM_SELF)
 
+    @cached_property
+    def _element_kernels(self):
+        M = self._element_mass_matrix
+        element_kernels = {}
+        for Vrow, Vcol in product(self.V, self.V):
+            # Interpolation of basis and exterior derivative onto broken spaces
+            C1 = self.assemble_reference_tensor(Vcol)
+            R1 = self.assemble_reference_tensor(Vrow, transpose=True)
+            # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b
+            element_kernel = TripleProductKernel(R1, M, C1)
+            schur_kernel = self.schur_kernel.get(Vrow) if Vrow == Vcol else None
+            if schur_kernel is not None:
+                V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior"))
+                C0 = self.assemble_reference_tensor(V0, sort_interior=True)
+                R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True)
+                element_kernel = schur_kernel(element_kernel,
+                                              TripleProductKernel(R1, M, C0),
+                                              TripleProductKernel(R0, M, C1),
+                                              TripleProductKernel(R0, M, C0))
+            element_kernels[Vrow, Vcol] = element_kernel
+        return element_kernels
+
+    @cached_property
+    def insert_mode(self):
+        is_dg = {}
+        for Vsub in self.V:
+            element = Vsub.finat_element
+            is_dg[Vsub] = element.entity_dofs() == element.entity_closure_dofs()
+
+        insert_mode = {}
+        for Vrow, Vcol in product(self.V, self.V):
+            addv = PETSc.InsertMode.ADD_VALUES
+            if is_dg[Vrow] or is_dg[Vcol]:
+                addv = PETSc.InsertMode.INSERT
+            insert_mode[Vrow, Vcol] = addv
+        return insert_mode
+
+    @cached_property
+    def assembly_lgmaps(self):
+        if self.mat_type != "is":
+            return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V}
+        lgmaps = {}
+        for Vsub in self.V:
+            lgmap = Vsub.dof_dset.lgmap
+            if self.allow_repeated:
+                indices = broken_function(Vsub, lgmap.indices).dat.data_ro
+            else:
+                indices = lgmap.indices.copy()
+                local_indices = numpy.arange(len(indices), dtype=PETSc.IntType)
+                cell_node_map = broken_function(Vsub, local_indices).dat.data_ro
+                ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True)
+                indices[ghost] = -1
+            lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm())
+        return lgmaps
+
+    def setup_block(self, Vrow, Vcol):
+        # Preallocate the auxiliary sparse operator
+        sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol))
+        rmap = self.assembly_lgmaps[Vrow]
+        cmap = self.assembly_lgmaps[Vcol]
+        on_diag = Vrow == Vcol
+        ptype = self.mat_type if on_diag else PETSc.Mat.Type.AIJ
+
+        preallocator = PETSc.Mat().create(comm=self.comm)
+        preallocator.setType(PETSc.Mat.Type.PREALLOCATOR)
+        preallocator.setSizes(sizes)
+        preallocator.setISAllowRepeated(self.allow_repeated)
+        preallocator.setLGMap(rmap, cmap)
+        preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False)
+        if ptype.endswith("sbaij"):
+            preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+        preallocator.setUp()
+        self.set_values(preallocator, Vrow, Vcol)
+        preallocator.assemble()
+        dnz, onz = get_preallocation(preallocator, sizes[0][0])
+        if on_diag:
+            numpy.maximum(dnz, 1, out=dnz)
+        preallocator.destroy()
+        P = PETSc.Mat().create(comm=self.comm)
+        P.setType(ptype)
+        P.setSizes(sizes)
+        P.setISAllowRepeated(self.allow_repeated)
+        P.setLGMap(rmap, cmap)
+        if on_diag and ptype == "is" and self.allow_repeated:
+            bsize = Vrow.finat_element.space_dimension() * Vrow.value_size
+            local_mat = P.getISLocalMat()
+            nblocks = local_mat.getSize()[0] // bsize
+            local_mat.setVariableBlockSizes([bsize] * nblocks)
+        P.setPreallocationNNZ((dnz, onz))
+
+        if not (ptype.endswith("sbaij") or ptype == "is"):
+            P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
+        P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
+        P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag)
+        P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True)
+        P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True)
+        if ptype.endswith("sbaij"):
+            P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True)
+        P.setUp()
+        return P
+
     @PETSc.Log.EventDecorator("FDMSetValues")
-    def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
+    def set_values(self, A, Vrow, Vcol, mat_type=None):
         """Assemble the auxiliary operator in the FDM basis using sparse
         reference tensors and diagonal mass matrices.
 
@@ -676,17 +810,17 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
             The test space.
         Vcol : FunctionSpace
             The trial space.
-        addv : PETSc.Mat.InsertMode
-            Flag indicating if we want to insert or add matrix values.
-        mat_type : PETSc.Mat.Type
-            The matrix type of auxiliary operator. This only used when ``A`` is a preallocator
-            to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix.
         """
         key = (Vrow.ufl_element(), Vcol.ufl_element())
         on_diag = Vrow == Vcol
+        if mat_type is None:
+            mat_type = A.getType()
         try:
             assembler = self.assemblers[key]
         except KeyError:
+            addv = self.insert_mode[Vrow, Vcol]
+            spaces = (Vrow,) if on_diag else (Vrow, Vcol)
+            indices_acc = tuple(self.indices_acc[V] for V in spaces)
             M = self._element_mass_matrix
             # Interpolation of basis and exterior derivative onto broken spaces
             C1 = self.assemble_reference_tensor(Vcol)
@@ -702,24 +836,29 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
                                               TripleProductKernel(R1, M, C0),
                                               TripleProductKernel(R0, M, C1),
                                               TripleProductKernel(R0, M, C0))
-            self.kernels.append(element_kernel)
-            spaces = (Vrow, Vcol)[on_diag:]
-            indices_acc = tuple(self.indices[V](op2.READ, V.cell_node_map()) for V in spaces)
             coefficients = self.coefficients["cell"]
             coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map())
+
+            element_kernel = self._element_kernels[Vrow, Vcol]
             kernel = element_kernel.kernel(on_diag=on_diag, addv=addv)
             assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
                                     *element_kernel.make_args(A),
                                     coefficients_acc,
                                     *indices_acc)
             self.assemblers.setdefault(key, assembler)
-        if A.getType() == "preallocator":
-            # Determine the global sparsity pattern by inserting a constant sparse element matrix
-            args = assembler.arguments[:2]
-            kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag)
-            assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
-                                    *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args),
-                                    *indices_acc)
+        if mat_type == "preallocator":
+            key = key + ("preallocator",)
+            try:
+                assembler = self.assemblers[key]
+            except KeyError:
+                # Determine the global sparsity pattern by inserting a constant sparse element matrix
+                args = assembler.arguments[:2]
+                kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag, addv=addv)
+                assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set,
+                                        *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args),
+                                        *indices_acc)
+                self.assemblers.setdefault(key, assembler)
+
         assembler.arguments[0].data = A.handle
         assembler()
 
@@ -729,9 +868,8 @@ class ElementKernel:
     By default, it inserts the same matrix on each cell."""
     code = dedent("""
         PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) {
-            PetscFunctionBeginUser;
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, A, name=None):
@@ -755,28 +893,23 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None):
                     PetscInt m;
                     const PetscInt *ai;
                     PetscScalar *vals;
-                    PetscFunctionBeginUser;
                     PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done));
                     PetscCall(MatSeqAIJGetArrayWrite(A, &vals));
                     PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals)));
                     PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals));
                     PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done));
-                    PetscFunctionReturn(PETSC_SUCCESS);
+                    return PETSC_SUCCESS;
                 }""")
         if mat_type != "matfree":
-            select_cols = """
-        for (PetscInt j = ai[i]; j < ai[i + 1]; j++)
-            indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);"""
             code += dedent("""
-                static inline PetscErrorCode MatSetValuesSparse(const Mat A, const Mat B,
-                                                                const PetscInt *restrict rindices,
-                                                                const PetscInt *restrict cindices,
-                                                                InsertMode addv) {
+                static inline PetscErrorCode MatSetValuesLocalSparse(const Mat A, const Mat B,
+                                                                     const PetscInt *restrict rindices,
+                                                                     const PetscInt *restrict cindices,
+                                                                     InsertMode addv) {
                     PetscBool done;
                     PetscInt m, ncols, istart, *indices;
                     const PetscInt *ai, *aj;
                     const PetscScalar *vals;
-                    PetscFunctionBeginUser;
                     PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done));
                     PetscCall(PetscMalloc1(ai[m], &indices));
                     for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]];
@@ -784,14 +917,13 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None):
                     for (PetscInt i = 0; i < m; i++) {
                         istart = ai[i];
                         ncols = ai[i + 1] - istart;
-                        %(select_cols)s
-                        PetscCall(MatSetValues(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv));
+                        PetscCall(MatSetValuesLocal(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv));
                     }
                     PetscCall(MatSeqAIJRestoreArrayRead(B, &vals));
                     PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done));
                     PetscCall(PetscFree(indices));
-                    PetscFunctionReturn(PETSC_SUCCESS);
-                }""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""})
+                    return PETSC_SUCCESS;
+                }""")
         code += self.code % dict(self.rules, name=self.name,
                                  indices=", ".join("const PetscInt *restrict %s" % s for s in indices),
                                  rows=indices[0], cols=indices[-1], addv=addv)
@@ -806,12 +938,11 @@ class TripleProductKernel(ElementKernel):
                                 const PetscScalar *restrict coefficients,
                                 %(indices)s) {
             Mat C;
-            PetscFunctionBeginUser;
             PetscCall(MatProductGetMats(B, NULL, &C, NULL));
             PetscCall(MatSetValuesArray(C, coefficients));
             PetscCall(MatProductNumeric(B));
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, L, C, R, name=None):
@@ -828,12 +959,12 @@ class SchurComplementKernel(ElementKernel):
                                 const Mat A11, const Mat A10, const Mat A01, const Mat A00,
                                 const PetscScalar *restrict coefficients, %(indices)s) {
             Mat C;
-            PetscFunctionBeginUser;
             PetscCall(MatProductGetMats(A11, NULL, &C, NULL));
             PetscCall(MatSetValuesArray(C, coefficients));
             %(condense)s
-            PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            PetscCall(MatSetValuesLocalSparse(A, A11, %(rows)s, %(cols)s, %(addv)d));
+            PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d));
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, *kernels, name=None):
@@ -852,7 +983,9 @@ def __init__(self, *kernels, name=None):
             istart += k * kdofs
         self.blocks = sorted(degree for degree in self.slices if degree > 1)
 
-        super().__init__(self.condense(), name=name)
+        result = self.condense()
+        result.axpy(1.0, self.submats[0])
+        super().__init__(result, name=name)
         self.mats.extend(self.submats)
         self.rules["condense"] = self.condense_code
 
@@ -864,16 +997,15 @@ class SchurComplementPattern(SchurComplementKernel):
     """Kernel builder to pad with zeros the Schur complement sparsity pattern."""
     condense_code = dedent("""
         PetscCall(MatProductNumeric(A11));
-        PetscCall(MatAYPX(B, 0.0, A11, SUBSET_NONZERO_PATTERN));
+        PetscCall(MatZeroEntries(B));
         """)
 
     def condense(self, result=None):
         """Pad with zeros the statically condensed pattern"""
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         if result is None:
             _, A10, A01, A00 = self.submats
             result = A10.matMatMult(A00, A01, result=result)
-        result.aypx(0.0, self.submats[0], structure=structure)
+        result.zeroEntries()
         return result
 
 
@@ -898,18 +1030,15 @@ class SchurComplementDiagonal(SchurComplementKernel):
         PetscCall(MatSeqAIJRestoreArray(A00, &vals));
 
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         self.work[0] = A00.getDiagonal(result=self.work[0])
         self.work[0].reciprocal()
         self.work[0].scale(-1)
         A01.diagonalScale(L=self.work[0])
         result = A10.matMult(A01, result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -923,7 +1052,6 @@ class SchurComplementBlockCholesky(SchurComplementKernel):
         const PetscInt *ai;
         PetscScalar *vals, *U;
         Mat X;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A01));
         PetscCall(MatProductNumeric(A00));
@@ -951,11 +1079,10 @@ class SchurComplementBlockCholesky(SchurComplementKernel):
         PetscCall(MatProductGetMats(B, &X, NULL, NULL));
         PetscCall(MatProductNumeric(X));
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAYPX(B, -1.0, A11, SUBSET_NONZERO_PATTERN));
+        PetscCall(MatScale(B, -1.0));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         # asssume that A10 = A01^T
         A11, _, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
@@ -976,7 +1103,7 @@ def condense(self, result=None):
         A00.assemble()
         self.work[0] = A00.matMult(A01, result=self.work[0])
         result = self.work[0].transposeMatMult(self.work[0], result=result)
-        result.aypx(-1.0, A11, structure=structure)
+        result.scale(-1.0)
         return result
 
 
@@ -990,7 +1117,6 @@ class SchurComplementBlockLU(SchurComplementKernel):
         const PetscInt *ai;
         PetscScalar *vals, *work, *L, *U;
         Mat X;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A10));
         PetscCall(MatProductNumeric(A01));
@@ -1049,13 +1175,11 @@ class SchurComplementBlockLU(SchurComplementKernel):
         PetscCall(MatSeqAIJRestoreArray(A00, &vals));
         PetscCall(PetscFree3(ipiv, perm, work));
 
-        // B = A11 - A10 * inv(L^T) * X
+        // B = - A10 * inv(L^T) * X
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
         Q = numpy.ones(R.shape, dtype=R.dtype)
@@ -1080,7 +1204,6 @@ def condense(self, result=None):
         A00.assemble()
         A00.scale(-1.0)
         result = A10.matMatMult(A00, self.work[0], result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -1093,7 +1216,6 @@ class SchurComplementBlockInverse(SchurComplementKernel):
         PetscInt m, irow, bsize, *ipiv;
         const PetscInt *ai;
         PetscScalar *vals, *work, *ainv, swork;
-        PetscFunctionBeginUser;
         PetscCall(MatProductNumeric(A11));
         PetscCall(MatProductNumeric(A10));
         PetscCall(MatProductNumeric(A01));
@@ -1129,11 +1251,9 @@ class SchurComplementBlockInverse(SchurComplementKernel):
 
         PetscCall(MatScale(A00, -1.0));
         PetscCall(MatProductNumeric(B));
-        PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN));
         """)
 
     def condense(self, result=None):
-        structure = PETSc.Mat.Structure.SUBSET if result else None
         A11, A10, A01, A00 = self.submats
         indptr, indices, R = A00.getValuesCSR()
 
@@ -1152,7 +1272,6 @@ def condense(self, result=None):
         A00.assemble()
         A00.scale(-1.0)
         result = A10.matMatMult(A00, A01, result=result)
-        result.axpy(1.0, A11, structure=structure)
         return result
 
 
@@ -1216,7 +1335,6 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False):
             static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) {
                 PetscScalar **appctx, *y;
                 const PetscScalar *x;
-                PetscFunctionBeginUser;
                 PetscCall(MatShellGetContext(A, &appctx));
                 PetscCall(VecZeroEntries(Y));
                 PetscCall(VecGetArray(Y, &y));
@@ -1224,7 +1342,7 @@ def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False):
                 %(matmult_call)s
                 PetscCall(VecRestoreArrayRead(X, &x));
                 PetscCall(VecRestoreArray(Y, &y));
-                PetscFunctionReturn(PETSC_SUCCESS);
+                return PETSC_SUCCESS;
             }""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")})
     return matmult_struct, matmult_call, ctx_struct, ctx_pack
 
@@ -1243,7 +1361,6 @@ class InteriorSolveKernel(ElementKernel):
             PetscInt m;
             Mat A, B, C;
             Vec X, Y;
-            PetscFunctionBeginUser;
             PetscCall(KSPGetOperators(ksp, &A, &B));
             PetscCall(MatShellSetContext(A, &appctx));
             PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior));
@@ -1256,7 +1373,7 @@ class InteriorSolveKernel(ElementKernel):
             PetscCall(KSPSolve(ksp, Y, X));
             PetscCall(VecDestroy(&X));
             PetscCall(VecDestroy(&Y));
-            PetscFunctionReturn(PETSC_SUCCESS);
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_type="icc"):
@@ -1310,7 +1427,6 @@ class ImplicitSchurComplementKernel(ElementKernel):
             PetscInt i;
             Mat A, B, C;
             Vec X, Y;
-            PetscFunctionBeginUser;
             PetscCall(KSPGetOperators(ksp, &A, &B));
             PetscCall(MatShellSetContext(A, &appctx));
             PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior));
@@ -1337,7 +1453,7 @@ class ImplicitSchurComplementKernel(ElementKernel):
             for (i = 0; i < %(size)d; i++) y[i] = 0.0;
             %(A_call)s
             for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]];
-            PetscFunctionReturn(PETSC_SUCCESS);
+            return PETSC_SUCCESS;
         }""")
 
     def __init__(self, kernel, name=None):
@@ -1747,10 +1863,8 @@ def load_setSubMatCSR(comm, triu=False):
                                   InsertMode addv)
             {{
                 PetscInt m, ncols, irow, icol;
-                PetscInt *indices;
-                const PetscInt *cols;
-                const PetscScalar *vals;
-                PetscFunctionBeginUser;
+                PetscInt *indices, *cols;
+                PetscScalar *vals;
                 PetscCall(MatGetSize(B, &m, NULL));
                 PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols));
                 PetscCall(PetscMalloc1(ncols, &indices));
@@ -1766,7 +1880,7 @@ def load_setSubMatCSR(comm, triu=False):
                     PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals));
                 }}
                 PetscCall(PetscFree(indices));
-                PetscFunctionReturn(PETSC_SUCCESS);
+                return PETSC_SUCCESS;
             }}
             """)
         argtypes = [ctypes.c_voidp, ctypes.c_voidp,
@@ -1836,7 +1950,7 @@ def assemble_reference_tensor(self, V):
         return Afdm, Dfdm, bdof, axes_shifts
 
     @PETSc.Log.EventDecorator("FDMSetValues")
-    def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
+    def set_values(self, A, Vrow, Vcol, mat_type=None):
         """Assemble the stiffness matrix in the FDM basis using Kronecker
         products of interval matrices.
 
@@ -1848,16 +1962,17 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"):
             The test space.
         Vcol : FunctionSpace
             The trial space.
-        addv : PETSc.Mat.InsertMode
-            Flag indicating if we want to insert or add matrix values.
         mat_type : PETSc.Mat.Type
             The matrix type of auxiliary operator. This only used when ``A`` is a preallocator
             to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix.
         """
+        if mat_type is None:
+            mat_type = A.getType()
         triu = A.getType() == "preallocator" and mat_type.endswith("sbaij")
         set_submat = SparseAssembler.setSubMatCSR(COMM_SELF, triu=triu)
         update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv)
         condense_element_mat = lambda x: x
+        addv = PETSc.InsertMode.ADD_VALUES
 
         def cell_to_global(lgmap, cell_to_local, cell_index, result=None):
             # Be careful not to create new arrays
@@ -2083,7 +2198,7 @@ def assemble_coefficients(self, J, fcp):
         tdim = mesh.topological_dimension()
         Finv = ufl.JacobianInverse(mesh)
 
-        degree = max(as_tuple(V.ufl_element().degree()))
+        degree, = set(as_tuple(V.ufl_element().degree()))
         quad_deg = fcp.get("degree", 2*degree+1)
         dx = ufl.dx(degree=quad_deg, domain=mesh)
         family = "Discontinuous Lagrange" if tdim == 1 else "DQ"
diff --git a/tests/firedrake/regression/test_bddc.py b/tests/firedrake/regression/test_bddc.py
new file mode 100644
index 0000000000..f1f1630e2d
--- /dev/null
+++ b/tests/firedrake/regression/test_bddc.py
@@ -0,0 +1,130 @@
+import pytest
+from firedrake import *
+from firedrake.petsc import DEFAULT_DIRECT_SOLVER
+
+
+def bddc_params(static_condensation):
+    chol = {
+        "pc_type": "cholesky",
+        "pc_factor_mat_solver_type": "petsc",
+        "pc_factor_mat_ordering_type": "natural",
+    }
+    sp = {
+        "pc_type": "python",
+        "pc_python_type": "firedrake.BDDCPC",
+        "bddc_pc_bddc_neumann": chol,
+        "bddc_pc_bddc_dirichlet": chol,
+        "bddc_pc_bddc_coarse": DEFAULT_DIRECT_SOLVER,
+    }
+    return sp
+
+
+def solver_parameters(static_condensation=True):
+    rtol = 1E-8
+    atol = 1E-12
+    sp_bddc = bddc_params(static_condensation)
+    repeated = True
+    if static_condensation:
+        sp = {
+            "pc_type": "python",
+            "pc_python_type": "firedrake.FacetSplitPC",
+            "facet_pc_type": "python",
+            "facet_pc_python_type": "firedrake.FDMPC",
+            "facet_fdm_static_condensation": True,
+            "facet_fdm_pc_use_amat": False,
+            "facet_fdm_mat_type": "is",
+            "facet_fdm_mat_is_allow_repeated": repeated,
+            "facet_fdm_pc_type": "fieldsplit",
+            "facet_fdm_pc_fieldsplit_type": "symmetric_multiplicative",
+            "facet_fdm_pc_fieldsplit_diag_use_amat": False,
+            "facet_fdm_pc_fieldsplit_off_diag_use_amat": False,
+            "facet_fdm_fieldsplit_ksp_type": "preonly",
+            "facet_fdm_fieldsplit_0_pc_type": "jacobi",
+            "facet_fdm_fieldsplit_1": sp_bddc,
+        }
+    else:
+        sp = {
+            "pc_type": "python",
+            "pc_python_type": "firedrake.FDMPC",
+            "fdm_pc_use_amat": False,
+            "fdm_mat_type": "is",
+            "fdm_mat_is_allow_repeated": repeated,
+            "fdm": sp_bddc,
+        }
+    sp.update({
+        "mat_type": "matfree",
+        "ksp_type": "cg",
+        "ksp_norm_type": "natural",
+        "ksp_monitor": None,
+        "ksp_rtol": rtol,
+        "ksp_atol": atol,
+    })
+    return sp
+
+
+def solve_riesz_map(mesh, family, degree, bcs, condense):
+    dirichlet_ids = []
+    if bcs:
+        dirichlet_ids = ["on_boundary"]
+        if hasattr(mesh, "extruded") and mesh.extruded:
+            dirichlet_ids.extend(["bottom", "top"])
+
+    tdim = mesh.topological_dimension()
+    if family.endswith("E"):
+        family = "RTCE" if tdim == 2 else "NCE"
+    if family.endswith("F"):
+        family = "RTCF" if tdim == 2 else "NCF"
+    V = FunctionSpace(mesh, family, degree, variant="fdm")
+    v = TestFunction(V)
+    u = TrialFunction(V)
+    d = {
+        H1: grad,
+        HCurl: curl,
+        HDiv: div,
+    }[V.ufl_element().sobolev_space]
+
+    formdegree = V.finat_element.formdegree
+    if formdegree == 0:
+        a = inner(d(u), d(v)) * dx(degree=2*degree)
+    else:
+        a = (inner(u, v) + inner(d(u), d(v))) * dx(degree=2*degree)
+
+    rg = RandomGenerator(PCG64(seed=123456789))
+    u_exact = rg.uniform(V, -1, 1)
+    L = ufl.replace(a, {u: u_exact})
+    bcs = [DirichletBC(V, u_exact, sub) for sub in dirichlet_ids]
+    nsp = None
+    if formdegree == 0:
+        nsp = VectorSpaceBasis([Function(V).interpolate(Constant(1))])
+        nsp.orthonormalize()
+
+    uh = Function(V, name="solution")
+    problem = LinearVariationalProblem(a, L, uh, bcs=bcs)
+
+    sp = solver_parameters(condense)
+    solver = LinearVariationalSolver(problem, near_nullspace=nsp,
+                                     solver_parameters=sp,
+                                     options_prefix="")
+    solver.solve()
+    return solver.snes.getLinearSolveIterations()
+
+
+@pytest.fixture(params=(2, 3), ids=("square", "cube"))
+def mesh(request):
+    nx = 4
+    dim = request.param
+    msh = UnitSquareMesh(nx, nx, quadrilateral=True)
+    if dim == 3:
+        msh = ExtrudedMesh(msh, nx)
+    return msh
+
+
+@pytest.mark.parallel
+@pytest.mark.parametrize("family", "Q")
+@pytest.mark.parametrize("degree", (4,))
+@pytest.mark.parametrize("condense", (False, True))
+def test_bddc_fdm(mesh, family, degree, condense):
+    bcs = True
+    tdim = mesh.topological_dimension()
+    expected = 6 if tdim == 2 else 11
+    assert solve_riesz_map(mesh, family, degree, bcs, condense) <= expected