From 07ba27e16c9a6ca6dbc61f5dd25102c8a8d4e22d Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Wed, 14 Feb 2024 16:01:06 +0000 Subject: [PATCH 1/9] work in progress --- firedrake/linear_solver.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/firedrake/linear_solver.py b/firedrake/linear_solver.py index c1dfbcc07e..8502ea1de8 100644 --- a/firedrake/linear_solver.py +++ b/firedrake/linear_solver.py @@ -163,8 +163,20 @@ def solve(self, x, b): else: acc = x.dat.vec_wo - with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): - self.ksp.solve(rhs, solution) + if "cuda" in self.A.petscmat.type: + with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): + b_cu = PETSc.Vec() + b_cu.createCUDAWithArrays(rhs) + u = PETSc.Vec() + u.createCUDAWithArrays(solution) + self.ksp.solve(b_cu, u) + u.getArray() + + else: + + with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): + self.ksp.solve(rhs, solution) + # print(solution.view()) r = self.ksp.getConvergedReason() if r < 0: From 00d5d75fdcd872fae1d2b94cfa6abec7924426fd Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Wed, 21 Feb 2024 12:31:36 +0000 Subject: [PATCH 2/9] offload (not yet really) first try --- firedrake/linear_solver.py | 3 +- firedrake/preconditioners/offload.py | 147 +++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 2 deletions(-) create mode 100644 firedrake/preconditioners/offload.py diff --git a/firedrake/linear_solver.py b/firedrake/linear_solver.py index 8502ea1de8..b52f41084e 100644 --- a/firedrake/linear_solver.py +++ b/firedrake/linear_solver.py @@ -170,13 +170,12 @@ def solve(self, x, b): u = PETSc.Vec() u.createCUDAWithArrays(solution) self.ksp.solve(b_cu, u) - u.getArray() + u.getArray() #how do this in one with solve before? else: with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): self.ksp.solve(rhs, solution) - # print(solution.view()) r = self.ksp.getConvergedReason() if r < 0: diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py new file mode 100644 index 0000000000..892645d141 --- /dev/null +++ b/firedrake/preconditioners/offload.py @@ -0,0 +1,147 @@ +#copied from assembled + +import abc + +from firedrake.preconditioners.base import PCBase +from firedrake.functionspace import FunctionSpace, MixedFunctionSpace +from firedrake.petsc import PETSc +from firedrake.ufl_expr import TestFunction, TrialFunction +import firedrake.dmhooks as dmhooks +from firedrake.dmhooks import get_function_space + +import petsc4py.PETSc # in firedrake.petsc? + +#outside: ksp.setOperators(A) + +__all__ = ("OffloadPC") + + +class OffloadPC(PCBase): #still PETSc PC object? + """Offload to GPU as PC to solve. + + Internally this makes a PETSc PC object that can be controlled by + options using the extra options prefix ``offload_``. + """ + + _prefix = "offload_" + + def initialize(self, pc): + A, P = pc.getOperators() #both needed?- forgot + + if pc.getType() != "python": + raise ValueError("Expecting PC type python") + opc = pc #opc? + appctx = self.get_appctx(pc) # + fcp = appctx.get("form_compiler_parameters") # + + #rest needed here? + V = get_function_space(pc.getDM()) + if len(V) == 1: + V = FunctionSpace(V.mesh(), V.ufl_element()) + else: + V = MixedFunctionSpace([V_ for V_ in V]) + test = TestFunction(V) + trial = TrialFunction(V) + + if P.type == "python": + context = P.getPythonContext() + # It only makes sense to preconditioner/invert a diagonal + # block in general. That's all we're going to allow. + if not context.on_diag: # + raise ValueError("Only makes sense to invert diagonal block") + + prefix = pc.getOptionsPrefix() + options_prefix = prefix + self._prefix + + mat_type = PETSc.Options().getString(options_prefix + "mat_type", "aij") #cuda + + (a, bcs) = self.form(pc, test, trial) + + self.P = allocate_matrix(a, bcs=bcs, + form_compiler_parameters=fcp, + mat_type=mat_type, + options_prefix=options_prefix) + self._assemble_P = TwoFormAssembler(a, tensor=self.P, bcs=bcs, + form_compiler_parameters=fcp).assemble + self._assemble_P() # + + # Transfer nullspace over + Pmat = self.P.petscmat + Pmat.setNullSpace(P.getNullSpace()) + tnullsp = P.getTransposeNullSpace() + if tnullsp.handle != 0: + Pmat.setTransposeNullSpace(tnullsp) + Pmat.setNearNullSpace(P.getNearNullSpace()) + + # Internally, we just set up a PC object that the user can configure + # however from the PETSc command line. Since PC allows the user to specify + # a KSP, we can do iterative by -assembled_pc_type ksp. + #? + +#same - matrix here + pc = PETSc.PC().create(comm=opc.comm) #comm? + pc.incrementTabLevel(1, parent=opc) # + + # We set a DM and an appropriate SNESContext on the constructed PC so one + # can do e.g. multigrid or patch solves. + dm = opc.getDM() + self._ctx_ref = self.new_snes_ctx(opc, a, bcs, mat_type, + fcp=fcp, options_prefix=options_prefix) + + pc.setDM(dm) #DM? + pc.setOptionsPrefix(options_prefix) + + #matrix to cuda + A_cu = petsc4py.PETSc.Mat() + A_cu.createDenseCUDA(A.petscmat.size) + A.petscmat.copy(A_cu) + A.petscmat = A_cu + + P_cu = petsc4py.PETSc.Mat() + P_cu.createDenseCUDA(A.petscmat.size) + Pmat.petscmat.copy(P_cu) + Pmat.petscmat = P_cu + + pc.setOperators(A_cu, P_cu) #right? + self.pc = pc + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): + pc.setFromOptions() + + def update(self, pc): + self._assemble_P() + + def form(self, pc, test, trial): + _, P = pc.getOperators() + if P.getType() == "python": + context = P.getPythonContext() + return (context.a, context.row_bcs) + else: + context = dmhooks.get_appctx(pc.getDM()) + return (context.Jp or context.J, context._problem.bcs) + +#vectors here + def apply(self, pc, x, y): #y=b? + dm = pc.getDM() + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref), : + b_cu = PETSc.Vec() #nonsense? + b_cu.createCUDAWithArrays(y) + u = PETSc.Vec() + u.createCUDAWithArrays(x) + self.pc.apply(x, y) + + def applyTranspose(self, pc, x, y): + dm = pc.getDM() + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + b_cu = PETSc.Vec() + b_cu.createCUDAWithArrays(y) + u = PETSc.Vec() + u.createCUDAWithArrays(x) + self.pc.applyTranspose(x, y) + + def view(self, pc, viewer=None): + super(AssembledPC, self).view(pc, viewer) + if hasattr(self, "pc"): + viewer.printfASCII("PC to apply inverse\n") + self.pc.view(viewer) + +#after pc where in lin solv? From 2aae94880718f84298f8c55b197e8f41060c1f53 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Sun, 25 Feb 2024 21:19:13 +0000 Subject: [PATCH 3/9] linear solver update cusparse --- firedrake/linear_solver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firedrake/linear_solver.py b/firedrake/linear_solver.py index b52f41084e..d9720f974d 100644 --- a/firedrake/linear_solver.py +++ b/firedrake/linear_solver.py @@ -163,14 +163,14 @@ def solve(self, x, b): else: acc = x.dat.vec_wo - if "cuda" in self.A.petscmat.type: + if "cu" in self.A.petscmat.type: #todo: cuda or cu? with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): b_cu = PETSc.Vec() b_cu.createCUDAWithArrays(rhs) u = PETSc.Vec() u.createCUDAWithArrays(solution) self.ksp.solve(b_cu, u) - u.getArray() #how do this in one with solve before? + u.getArray() else: From 6ed6495f93f08f912b705e2c6eb9de69956fa304 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Wed, 6 Mar 2024 14:34:02 +0000 Subject: [PATCH 4/9] some more changes --- firedrake/preconditioners/offload.py | 97 ++++++++++++---------------- 1 file changed, 42 insertions(+), 55 deletions(-) diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py index 892645d141..f0e225381f 100644 --- a/firedrake/preconditioners/offload.py +++ b/firedrake/preconditioners/offload.py @@ -1,5 +1,3 @@ -#copied from assembled - import abc from firedrake.preconditioners.base import PCBase @@ -26,44 +24,32 @@ class OffloadPC(PCBase): #still PETSc PC object? _prefix = "offload_" def initialize(self, pc): - A, P = pc.getOperators() #both needed?- forgot + A, P = pc.getOperators() #P preconditioner - if pc.getType() != "python": - raise ValueError("Expecting PC type python") + if pc.getType() != "assembled": + raise ValueError("Expecting PC type assembled") #correct type? opc = pc #opc? - appctx = self.get_appctx(pc) # - fcp = appctx.get("form_compiler_parameters") # - - #rest needed here? - V = get_function_space(pc.getDM()) - if len(V) == 1: - V = FunctionSpace(V.mesh(), V.ufl_element()) - else: - V = MixedFunctionSpace([V_ for V_ in V]) - test = TestFunction(V) - trial = TrialFunction(V) - - if P.type == "python": + appctx = self.get_appctx(pc) + fcp = appctx.get("form_compiler_parameters") + + if P.type == "assembled": #not python value error - only assembled (preconditioner) context = P.getPythonContext() # It only makes sense to preconditioner/invert a diagonal # block in general. That's all we're going to allow. - if not context.on_diag: # + if not context.on_diag: #still? diagonal block? raise ValueError("Only makes sense to invert diagonal block") prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix - mat_type = PETSc.Options().getString(options_prefix + "mat_type", "aij") #cuda + mat_type = PETSc.Options().getString(options_prefix + "mat_type", "aijcusparse") (a, bcs) = self.form(pc, test, trial) - self.P = allocate_matrix(a, bcs=bcs, + self.P = allocate_matrix(a, bcs=bcs, #eventually change allocate matrix form_compiler_parameters=fcp, mat_type=mat_type, options_prefix=options_prefix) - self._assemble_P = TwoFormAssembler(a, tensor=self.P, bcs=bcs, - form_compiler_parameters=fcp).assemble - self._assemble_P() # # Transfer nullspace over Pmat = self.P.petscmat @@ -79,7 +65,7 @@ def initialize(self, pc): #? #same - matrix here - pc = PETSc.PC().create(comm=opc.comm) #comm? + pc = PETSc.PC().create(comm=opc.comm) pc.incrementTabLevel(1, parent=opc) # # We set a DM and an appropriate SNESContext on the constructed PC so one @@ -88,7 +74,7 @@ def initialize(self, pc): self._ctx_ref = self.new_snes_ctx(opc, a, bcs, mat_type, fcp=fcp, options_prefix=options_prefix) - pc.setDM(dm) #DM? + pc.setDM(dm) pc.setOptionsPrefix(options_prefix) #matrix to cuda @@ -96,52 +82,53 @@ def initialize(self, pc): A_cu.createDenseCUDA(A.petscmat.size) A.petscmat.copy(A_cu) A.petscmat = A_cu + self._offload_A = A.petscmat #fishy P_cu = petsc4py.PETSc.Mat() P_cu.createDenseCUDA(A.petscmat.size) Pmat.petscmat.copy(P_cu) Pmat.petscmat = P_cu + self._offload_A = Pmat.petscmat #fishy - pc.setOperators(A_cu, P_cu) #right? + pc.setOperators(A_cu, P_cu) self.pc = pc with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): pc.setFromOptions() def update(self, pc): - self._assemble_P() - - def form(self, pc, test, trial): - _, P = pc.getOperators() - if P.getType() == "python": - context = P.getPythonContext() - return (context.a, context.row_bcs) - else: - context = dmhooks.get_appctx(pc.getDM()) - return (context.Jp or context.J, context._problem.bcs) - -#vectors here + self._offload_A() + + # def form(self, pc, test, trial): + # _, P = pc.getOperators() + # if P.getType() == "python": + # context = P.getPythonContext() + # return (context.a, context.row_bcs) + # else: + # context = dmhooks.get_appctx(pc.getDM()) + # return (context.Jp or context.J, context._problem.bcs) + +#vectors and solve def apply(self, pc, x, y): #y=b? dm = pc.getDM() with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref), : - b_cu = PETSc.Vec() #nonsense? + b_cu = PETSc.Vec() b_cu.createCUDAWithArrays(y) u = PETSc.Vec() u.createCUDAWithArrays(x) - self.pc.apply(x, y) - - def applyTranspose(self, pc, x, y): - dm = pc.getDM() - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - b_cu = PETSc.Vec() - b_cu.createCUDAWithArrays(y) - u = PETSc.Vec() - u.createCUDAWithArrays(x) - self.pc.applyTranspose(x, y) + self.pc.apply(x, y) #solve is here + u.getArray() #give vector back + + # def applyTranspose(self, pc, x, y): #same but other side + # dm = pc.getDM() + # with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + # b_cu = PETSc.Vec() + # b_cu.createCUDAWithArrays(y) + # u = PETSc.Vec() + # u.createCUDAWithArrays(x) + # self.pc.applyTranspose(x, y) def view(self, pc, viewer=None): - super(AssembledPC, self).view(pc, viewer) + super().view(pc, viewer) if hasattr(self, "pc"): - viewer.printfASCII("PC to apply inverse\n") - self.pc.view(viewer) - -#after pc where in lin solv? + viewer.printfASCII("PC to solve on GPU\n") + self.pc.view(viewer) \ No newline at end of file From 5f75618d9f65862d6220a03a625422cfbade8dd7 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Tue, 12 Mar 2024 13:03:03 +0000 Subject: [PATCH 5/9] cusparse convert - not done --- firedrake/linear_solver.py | 25 +++--- firedrake/preconditioners/offload.py | 122 +++++++++++---------------- 2 files changed, 62 insertions(+), 85 deletions(-) diff --git a/firedrake/linear_solver.py b/firedrake/linear_solver.py index d9720f974d..c8e58c0ec6 100644 --- a/firedrake/linear_solver.py +++ b/firedrake/linear_solver.py @@ -55,6 +55,8 @@ def __init__(self, A, *, P=None, solver_parameters=None, solver_parameters = solving_utils.set_defaults(solver_parameters, A.arguments(), ksp_defaults=self.DEFAULT_KSP_PARAMETERS) + # todo: add offload to solver parameters - how? prefix? + self.A = A self.comm = A.comm self._comm = internal_comm(self.comm, self) @@ -163,19 +165,20 @@ def solve(self, x, b): else: acc = x.dat.vec_wo - if "cu" in self.A.petscmat.type: #todo: cuda or cu? - with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): - b_cu = PETSc.Vec() - b_cu.createCUDAWithArrays(rhs) - u = PETSc.Vec() - u.createCUDAWithArrays(solution) - self.ksp.solve(b_cu, u) - u.getArray() + # if "cu" in self.A.petscmat.type: # todo: cuda or cu? + # with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): + # b_cu = PETSc.Vec() + # b_cu.createCUDAWithArrays(rhs) + # u = PETSc.Vec() + # u.createCUDAWithArrays(solution) + # self.ksp.solve(b_cu, u) + # u.getArray() - else: + # else: + # instead: preconditioner - with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): - self.ksp.solve(rhs, solution) + with self.inserted_options(), b.dat.vec_ro as rhs, acc as solution, dmhooks.add_hooks(self.ksp.dm, self): + self.ksp.solve(rhs, solution) r = self.ksp.getConvergedReason() if r < 0: diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py index f0e225381f..437c81deca 100644 --- a/firedrake/preconditioners/offload.py +++ b/firedrake/preconditioners/offload.py @@ -9,12 +9,13 @@ import petsc4py.PETSc # in firedrake.petsc? -#outside: ksp.setOperators(A) +# outside: ksp.setOperators(A) +# todo: for densecuda later- now only cusparse __all__ = ("OffloadPC") -class OffloadPC(PCBase): #still PETSc PC object? +class OffloadPC(PCBase): """Offload to GPU as PC to solve. Internally this makes a PETSc PC object that can be controlled by @@ -24,111 +25,84 @@ class OffloadPC(PCBase): #still PETSc PC object? _prefix = "offload_" def initialize(self, pc): - A, P = pc.getOperators() #P preconditioner + A, P = pc.getOperators() # P preconditioner if pc.getType() != "assembled": - raise ValueError("Expecting PC type assembled") #correct type? - opc = pc #opc? - appctx = self.get_appctx(pc) - fcp = appctx.get("form_compiler_parameters") - - if P.type == "assembled": #not python value error - only assembled (preconditioner) + raise ValueError("Expecting PC type assembled") # correct type? + outer_pc = pc + appctx = self.get_appctx(pc) + fcp = appctx.get("form_compiler_parameters") + + V = get_function_space(pc.getDM()) + if len(V) == 1: + V = FunctionSpace(V.mesh(), V.ufl_element()) + else: + V = MixedFunctionSpace([V_ for V_ in V]) + test = TestFunction(V) + trial = TrialFunction(V) + + (a, bcs) = self.form(pc, test, trial) + + if P.type == "assembled": # not python value error - only assembled (preconditioner) context = P.getPythonContext() # It only makes sense to preconditioner/invert a diagonal # block in general. That's all we're going to allow. - if not context.on_diag: #still? diagonal block? + if not context.on_diag: # still? diagonal block? raise ValueError("Only makes sense to invert diagonal block") - + prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix - mat_type = PETSc.Options().getString(options_prefix + "mat_type", "aijcusparse") - - (a, bcs) = self.form(pc, test, trial) + mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") # cuda? - self.P = allocate_matrix(a, bcs=bcs, #eventually change allocate matrix - form_compiler_parameters=fcp, - mat_type=mat_type, - options_prefix=options_prefix) + # eventually change allocate_matrix from assembled.py # Transfer nullspace over - Pmat = self.P.petscmat + Pmat = self.P.petscmat Pmat.setNullSpace(P.getNullSpace()) tnullsp = P.getTransposeNullSpace() if tnullsp.handle != 0: Pmat.setTransposeNullSpace(tnullsp) Pmat.setNearNullSpace(P.getNearNullSpace()) - # Internally, we just set up a PC object that the user can configure - # however from the PETSc command line. Since PC allows the user to specify - # a KSP, we can do iterative by -assembled_pc_type ksp. - #? + pc = PETSc.PC().create(comm=outer_pc.comm) + pc.incrementTabLevel(1, parent=outer_pc) -#same - matrix here - pc = PETSc.PC().create(comm=opc.comm) - pc.incrementTabLevel(1, parent=opc) # - - # We set a DM and an appropriate SNESContext on the constructed PC so one - # can do e.g. multigrid or patch solves. - dm = opc.getDM() - self._ctx_ref = self.new_snes_ctx(opc, a, bcs, mat_type, + # We set a DM and an appropriate SNESContext on the constructed PC + # so one can do e.g. multigrid or patch solves. + dm = outer_pc.getDM() + self._ctx_ref = self.new_snes_ctx(outer_pc, a, bcs, mat_type, fcp=fcp, options_prefix=options_prefix) - pc.setDM(dm) + pc.setDM(dm) pc.setOptionsPrefix(options_prefix) - #matrix to cuda - A_cu = petsc4py.PETSc.Mat() - A_cu.createDenseCUDA(A.petscmat.size) - A.petscmat.copy(A_cu) - A.petscmat = A_cu - self._offload_A = A.petscmat #fishy - - P_cu = petsc4py.PETSc.Mat() - P_cu.createDenseCUDA(A.petscmat.size) - Pmat.petscmat.copy(P_cu) - Pmat.petscmat = P_cu - self._offload_A = Pmat.petscmat #fishy + # matrix to cuda + P_cu = P.petscmat.convert(mat_type='aijcusparse') - pc.setOperators(A_cu, P_cu) + pc.setOperators(A, P_cu) self.pc = pc - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): pc.setFromOptions() def update(self, pc): - self._offload_A() - - # def form(self, pc, test, trial): - # _, P = pc.getOperators() - # if P.getType() == "python": - # context = P.getPythonContext() - # return (context.a, context.row_bcs) - # else: - # context = dmhooks.get_appctx(pc.getDM()) - # return (context.Jp or context.J, context._problem.bcs) - -#vectors and solve - def apply(self, pc, x, y): #y=b? + _, P = pc.getOperators() + _, P_cu = self.pc.getOperators() + P.copy(P_cu) + +# vectors and solve + def apply(self, pc, x, y): # y=b? dm = pc.getDM() - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref), : - b_cu = PETSc.Vec() - b_cu.createCUDAWithArrays(y) + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + b_cu = PETSc.Vec() + b_cu.createCUDAWithArrays(y) u = PETSc.Vec() u.createCUDAWithArrays(x) - self.pc.apply(x, y) #solve is here - u.getArray() #give vector back - - # def applyTranspose(self, pc, x, y): #same but other side - # dm = pc.getDM() - # with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - # b_cu = PETSc.Vec() - # b_cu.createCUDAWithArrays(y) - # u = PETSc.Vec() - # u.createCUDAWithArrays(x) - # self.pc.applyTranspose(x, y) + self.pc.apply(x, y) # solve is here + u.getArray() # return vector def view(self, pc, viewer=None): super().view(pc, viewer) if hasattr(self, "pc"): viewer.printfASCII("PC to solve on GPU\n") - self.pc.view(viewer) \ No newline at end of file + self.pc.view(viewer) From 500b7d3ef52dd705f26caea52fd6f162489989c2 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Thu, 14 Mar 2024 17:38:16 +0000 Subject: [PATCH 6/9] last changes to offload --- firedrake/preconditioners/__init__.py | 1 + firedrake/preconditioners/offload.py | 37 +++++++++++++++++---------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/firedrake/preconditioners/__init__.py b/firedrake/preconditioners/__init__.py index cd75ae7380..ca04bd9cbd 100644 --- a/firedrake/preconditioners/__init__.py +++ b/firedrake/preconditioners/__init__.py @@ -12,3 +12,4 @@ from firedrake.preconditioners.fdm import * # noqa: F401 from firedrake.preconditioners.hiptmair import * # noqa: F401 from firedrake.preconditioners.facet_split import * # noqa: F401 +from firedrake.preconditioners.offload import * # noqa: F401 diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py index 437c81deca..9e5345b320 100644 --- a/firedrake/preconditioners/offload.py +++ b/firedrake/preconditioners/offload.py @@ -12,7 +12,7 @@ # outside: ksp.setOperators(A) # todo: for densecuda later- now only cusparse -__all__ = ("OffloadPC") +__all__ = ("OffloadPC",) class OffloadPC(PCBase): @@ -27,8 +27,6 @@ class OffloadPC(PCBase): def initialize(self, pc): A, P = pc.getOperators() # P preconditioner - if pc.getType() != "assembled": - raise ValueError("Expecting PC type assembled") # correct type? outer_pc = pc appctx = self.get_appctx(pc) fcp = appctx.get("form_compiler_parameters") @@ -55,15 +53,15 @@ def initialize(self, pc): mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") # cuda? + # matrix to cuda + P_cu = P.convert(mat_type='aijcusparse') # eventually change allocate_matrix from assembled.py - # Transfer nullspace over - Pmat = self.P.petscmat - Pmat.setNullSpace(P.getNullSpace()) + P_cu.setNullSpace(P.getNullSpace()) tnullsp = P.getTransposeNullSpace() if tnullsp.handle != 0: - Pmat.setTransposeNullSpace(tnullsp) - Pmat.setNearNullSpace(P.getNearNullSpace()) + P_cu.setTransposeNullSpace(tnullsp) + P_cu.setNearNullSpace(P.getNearNullSpace()) pc = PETSc.PC().create(comm=outer_pc.comm) pc.incrementTabLevel(1, parent=outer_pc) @@ -71,15 +69,13 @@ def initialize(self, pc): # We set a DM and an appropriate SNESContext on the constructed PC # so one can do e.g. multigrid or patch solves. dm = outer_pc.getDM() - self._ctx_ref = self.new_snes_ctx(outer_pc, a, bcs, mat_type, - fcp=fcp, options_prefix=options_prefix) + self._ctx_ref = self.new_snes_ctx( + outer_pc, a, bcs, mat_type, + fcp=fcp, options_prefix=options_prefix + ) pc.setDM(dm) pc.setOptionsPrefix(options_prefix) - - # matrix to cuda - P_cu = P.petscmat.convert(mat_type='aijcusparse') - pc.setOperators(A, P_cu) self.pc = pc with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): @@ -90,6 +86,15 @@ def update(self, pc): _, P_cu = self.pc.getOperators() P.copy(P_cu) + def form(self, pc, test, trial): + _, P = pc.getOperators() + if P.getType() == "python": + context = P.getPythonContext() + return (context.a, context.row_bcs) + else: + context = dmhooks.get_appctx(pc.getDM()) + return (context.Jp or context.J, context._problem.bcs) + # vectors and solve def apply(self, pc, x, y): # y=b? dm = pc.getDM() @@ -101,8 +106,12 @@ def apply(self, pc, x, y): # y=b? self.pc.apply(x, y) # solve is here u.getArray() # return vector + def applyTranspose(self, pc, X, Y): + raise NotImplementedError + def view(self, pc, viewer=None): super().view(pc, viewer) + print("viewing PC") if hasattr(self, "pc"): viewer.printfASCII("PC to solve on GPU\n") self.pc.view(viewer) From 2778be5a25e4a29d5cac5ed908c9daff4f27e151 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Tue, 23 Apr 2024 19:37:52 +0100 Subject: [PATCH 7/9] Commentary --- firedrake/preconditioners/offload.py | 36 ++++++++++++---------------- firedrake/solving.py | 21 +++++++++------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py index 9e5345b320..c62515853d 100644 --- a/firedrake/preconditioners/offload.py +++ b/firedrake/preconditioners/offload.py @@ -1,5 +1,3 @@ -import abc - from firedrake.preconditioners.base import PCBase from firedrake.functionspace import FunctionSpace, MixedFunctionSpace from firedrake.petsc import PETSc @@ -7,16 +5,11 @@ import firedrake.dmhooks as dmhooks from firedrake.dmhooks import get_function_space -import petsc4py.PETSc # in firedrake.petsc? - -# outside: ksp.setOperators(A) -# todo: for densecuda later- now only cusparse - __all__ = ("OffloadPC",) class OffloadPC(PCBase): - """Offload to GPU as PC to solve. + """Offload PC from CPU to GPU and back. Internally this makes a PETSc PC object that can be controlled by options using the extra options prefix ``offload_``. @@ -41,28 +34,29 @@ def initialize(self, pc): (a, bcs) = self.form(pc, test, trial) - if P.type == "assembled": # not python value error - only assembled (preconditioner) + if P.type == "assembled": context = P.getPythonContext() # It only makes sense to preconditioner/invert a diagonal # block in general. That's all we're going to allow. - if not context.on_diag: # still? diagonal block? + if not context.on_diag: raise ValueError("Only makes sense to invert diagonal block") prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix - mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") # cuda? + mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") - # matrix to cuda + # Convert matrix to ajicusparse P_cu = P.convert(mat_type='aijcusparse') - # eventually change allocate_matrix from assembled.py + # Transfer nullspace P_cu.setNullSpace(P.getNullSpace()) tnullsp = P.getTransposeNullSpace() if tnullsp.handle != 0: P_cu.setTransposeNullSpace(tnullsp) P_cu.setNearNullSpace(P.getNearNullSpace()) + # PC object set-up pc = PETSc.PC().create(comm=outer_pc.comm) pc.incrementTabLevel(1, parent=outer_pc) @@ -95,16 +89,16 @@ def form(self, pc, test, trial): context = dmhooks.get_appctx(pc.getDM()) return (context.Jp or context.J, context._problem.bcs) -# vectors and solve - def apply(self, pc, x, y): # y=b? + # Convert vectors to CUDA, solve and get solution on CPU back + def apply(self, pc, x, y): dm = pc.getDM() with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - b_cu = PETSc.Vec() - b_cu.createCUDAWithArrays(y) - u = PETSc.Vec() - u.createCUDAWithArrays(x) - self.pc.apply(x, y) # solve is here - u.getArray() # return vector + y_cu = PETSc.Vec() + y_cu.createCUDAWithArrays(y) + x_cu = PETSc.Vec() + x_cu.createCUDAWithArrays(x) + self.pc.apply(x_cu, y_cu) + y.copy(y_cu) def applyTranspose(self, pc, X, Y): raise NotImplementedError diff --git a/firedrake/solving.py b/firedrake/solving.py index de55e52048..46754a2a17 100644 --- a/firedrake/solving.py +++ b/firedrake/solving.py @@ -252,15 +252,18 @@ def _la_solve(A, x, b, **kwargs): options_prefix=options_prefix) if isinstance(x, firedrake.Vector): x = x.function - # linear MG doesn't need RHS, supply zero. - lvp = vs.LinearVariationalProblem(a=A.a, L=0, u=x, bcs=A.bcs) - mat_type = A.mat_type - appctx = solver_parameters.get("appctx", {}) - ctx = solving_utils._SNESContext(lvp, - mat_type=mat_type, - pmat_type=mat_type, - appctx=appctx, - options_prefix=options_prefix) + if not isinstance(A, firedrake.matrix.AssembledMatrix): + # linear MG doesn't need RHS, supply zero. + lvp = vs.LinearVariationalProblem(a=A.a, L=0, u=x, bcs=A.bcs) + mat_type = A.mat_type + appctx = solver_parameters.get("appctx", {}) + ctx = solving_utils._SNESContext(lvp, + mat_type=mat_type, + pmat_type=mat_type, + appctx=appctx, + options_prefix=options_prefix) + else: + ctx = None dm = solver.ksp.dm with dmhooks.add_hooks(dm, solver, appctx=ctx): From 7f8d3f30dfe4491b8291961b159489101df468de Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Thu, 9 May 2024 20:21:55 +0100 Subject: [PATCH 8/9] after meeting --- demos/helmholtz/helmholtz.txt | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 demos/helmholtz/helmholtz.txt diff --git a/demos/helmholtz/helmholtz.txt b/demos/helmholtz/helmholtz.txt new file mode 100644 index 0000000000..95334ccb6a --- /dev/null +++ b/demos/helmholtz/helmholtz.txt @@ -0,0 +1,57 @@ +Main Stage 366614 +Main Stage;firedrake 44369 +Main Stage;firedrake;firedrake.solving.solve 86 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve 196 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve 140 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval 736 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute 212 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral 112 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;ParLoopExecute;Parloop_Cells_wrap_form0_cell_integral;pyop2.global_kernel.GlobalKernel.compile 415552 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESFunctionEval;firedrake.tsfc_interface.compile_form 42597 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval 866 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute 149 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral 136 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.solve;SNESSolve;SNESJacobianEval;ParLoopExecute;Parloop_Cells_wrap_form00_cell_integral;pyop2.global_kernel.GlobalKernel.compile 407506 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__ 1771 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form 56423 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.tsfc_interface.compile_form;firedrake.formmanipulation.split_form 1907 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.NonlinearVariationalSolver.__init__;firedrake.solving_utils._SNESContext.__init__ 618 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__ 145 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.ufl_expr.action 4387 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__ 332 +Main Stage;firedrake;firedrake.solving.solve;firedrake.variational_solver.LinearVariationalProblem.__init__;firedrake.variational_solver.NonlinearVariationalProblem.__init__;firedrake.ufl_expr.adjoint 2798 +Main Stage;firedrake;firedrake.function.Function.interpolate 342 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble 5644 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 29 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 298 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 204 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 682292 +Main Stage;firedrake;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 40658 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write 2473 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate 303 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble 1080 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate 23 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute 328 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel 165 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.SameMeshInterpolator._interpolate;ParLoopExecute;Parloop_Cells_wrap_expression_kernel;pyop2.global_kernel.GlobalKernel.compile 663410 +Main Stage;firedrake;firedrake.output.vtk_output.VTKFile.write;firedrake.function.Function.interpolate;firedrake.assemble.assemble;firedrake.interpolation.make_interpolator 55147 +Main Stage;firedrake;firedrake.__init__ 495196 +Main Stage;firedrake;firedrake.assemble.assemble 949 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute 310 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral 95 +Main Stage;firedrake;firedrake.assemble.assemble;ParLoopExecute;Parloop_Cells_wrap_form_cell_integral;pyop2.global_kernel.GlobalKernel.compile 355507 +Main Stage;firedrake;firedrake.assemble.assemble;firedrake.tsfc_interface.compile_form 20219 +Main Stage;firedrake;CreateFunctionSpace 919 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace 79 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__ 165 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data 13 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__ 825 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;FunctionSpaceData: CreateElement 1274 +Main Stage;firedrake;CreateFunctionSpace;CreateFunctionSpace;firedrake.functionspaceimpl.FunctionSpace.__init__;firedrake.functionspacedata.get_shared_data;firedrake.functionspacedata.FunctionSpaceData.__init__;firedrake.mesh.MeshTopology._facets 789 +Main Stage;firedrake;CreateFunctionSpace;CreateMesh 147 +Main Stage;firedrake;CreateFunctionSpace;CreateMesh;Mesh: numbering 376 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh 12 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh 11 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh 834 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;CreateMesh 676 +Main Stage;firedrake;firedrake.utility_meshes.UnitSquareMesh;firedrake.utility_meshes.SquareMesh;firedrake.utility_meshes.RectangleMesh;DMPlexInterp 382 From 6046bab1e1035065a1e812ca2da5754dbd2c4937 Mon Sep 17 00:00:00 2001 From: Lilly Schweiger Date: Fri, 12 Jul 2024 15:46:59 +0100 Subject: [PATCH 9/9] Events --- firedrake/preconditioners/offload.py | 134 ++++++++++++++------------- 1 file changed, 70 insertions(+), 64 deletions(-) diff --git a/firedrake/preconditioners/offload.py b/firedrake/preconditioners/offload.py index c62515853d..7a306ae24e 100644 --- a/firedrake/preconditioners/offload.py +++ b/firedrake/preconditioners/offload.py @@ -18,62 +18,64 @@ class OffloadPC(PCBase): _prefix = "offload_" def initialize(self, pc): - A, P = pc.getOperators() # P preconditioner - - outer_pc = pc - appctx = self.get_appctx(pc) - fcp = appctx.get("form_compiler_parameters") - - V = get_function_space(pc.getDM()) - if len(V) == 1: - V = FunctionSpace(V.mesh(), V.ufl_element()) - else: - V = MixedFunctionSpace([V_ for V_ in V]) - test = TestFunction(V) - trial = TrialFunction(V) - - (a, bcs) = self.form(pc, test, trial) - - if P.type == "assembled": - context = P.getPythonContext() - # It only makes sense to preconditioner/invert a diagonal - # block in general. That's all we're going to allow. - if not context.on_diag: - raise ValueError("Only makes sense to invert diagonal block") - - prefix = pc.getOptionsPrefix() - options_prefix = prefix + self._prefix - - mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") - - # Convert matrix to ajicusparse - P_cu = P.convert(mat_type='aijcusparse') - - # Transfer nullspace - P_cu.setNullSpace(P.getNullSpace()) - tnullsp = P.getTransposeNullSpace() - if tnullsp.handle != 0: - P_cu.setTransposeNullSpace(tnullsp) - P_cu.setNearNullSpace(P.getNearNullSpace()) - - # PC object set-up - pc = PETSc.PC().create(comm=outer_pc.comm) - pc.incrementTabLevel(1, parent=outer_pc) - - # We set a DM and an appropriate SNESContext on the constructed PC - # so one can do e.g. multigrid or patch solves. - dm = outer_pc.getDM() - self._ctx_ref = self.new_snes_ctx( - outer_pc, a, bcs, mat_type, - fcp=fcp, options_prefix=options_prefix - ) - - pc.setDM(dm) - pc.setOptionsPrefix(options_prefix) - pc.setOperators(A, P_cu) - self.pc = pc - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): - pc.setFromOptions() + with PETSc.Log.Event("Event: initialize offload"): # + A, P = pc.getOperators() + + outer_pc = pc + appctx = self.get_appctx(pc) + fcp = appctx.get("form_compiler_parameters") + + V = get_function_space(pc.getDM()) + if len(V) == 1: + V = FunctionSpace(V.mesh(), V.ufl_element()) + else: + V = MixedFunctionSpace([V_ for V_ in V]) + test = TestFunction(V) + trial = TrialFunction(V) + + (a, bcs) = self.form(pc, test, trial) + + if P.type == "assembled": + context = P.getPythonContext() + # It only makes sense to preconditioner/invert a diagonal + # block in general. That's all we're going to allow. + if not context.on_diag: + raise ValueError("Only makes sense to invert diagonal block") + + prefix = pc.getOptionsPrefix() + options_prefix = prefix + self._prefix + + mat_type = PETSc.Options().getString(options_prefix + "mat_type", "cusparse") + + # Convert matrix to ajicusparse + with PETSc.Log.Event("Event: matrix offload"): + P_cu = P.convert(mat_type='aijcusparse') # todo + + # Transfer nullspace + P_cu.setNullSpace(P.getNullSpace()) + tnullsp = P.getTransposeNullSpace() + if tnullsp.handle != 0: + P_cu.setTransposeNullSpace(tnullsp) + P_cu.setNearNullSpace(P.getNearNullSpace()) + + # PC object set-up + pc = PETSc.PC().create(comm=outer_pc.comm) + pc.incrementTabLevel(1, parent=outer_pc) + + # We set a DM and an appropriate SNESContext on the constructed PC + # so one can do e.g. multigrid or patch solves. + dm = outer_pc.getDM() + self._ctx_ref = self.new_snes_ctx( + outer_pc, a, bcs, mat_type, + fcp=fcp, options_prefix=options_prefix + ) + + pc.setDM(dm) + pc.setOptionsPrefix(options_prefix) + pc.setOperators(A, P_cu) + self.pc = pc + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref, save=False): + pc.setFromOptions() def update(self, pc): _, P = pc.getOperators() @@ -91,14 +93,18 @@ def form(self, pc, test, trial): # Convert vectors to CUDA, solve and get solution on CPU back def apply(self, pc, x, y): - dm = pc.getDM() - with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): - y_cu = PETSc.Vec() - y_cu.createCUDAWithArrays(y) - x_cu = PETSc.Vec() - x_cu.createCUDAWithArrays(x) - self.pc.apply(x_cu, y_cu) - y.copy(y_cu) + with PETSc.Log.Event("Event: apply offload"): # + dm = pc.getDM() + with dmhooks.add_hooks(dm, self, appctx=self._ctx_ref): + with PETSc.Log.Event("Event: vectors offload"): + y_cu = PETSc.Vec() # begin + y_cu.createCUDAWithArrays(y) + x_cu = PETSc.Vec() + x_cu.createCUDAWithArrays(x) # end + with PETSc.Log.Event("Event: solve"): + self.pc.apply(x_cu, y_cu) # + with PETSc.Log.Event("Event: vectors copy back"): + y.copy(y_cu) # def applyTranspose(self, pc, X, Y): raise NotImplementedError