Merge pull request #488 from OP2/petsc-logging

* petsc-logging: Switch to using PETSc logging travis: Install pulp
OP2 · May 10, 2016 · 2062a47 · 2062a47
2 parents 9125d48 + 6871246
commit 2062a47
Show file tree

Hide file tree

Showing 12 changed files with 47 additions and 336 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -37,6 +37,7 @@ before_install:
        --allow-external petsc --allow-unverified petsc \
        --allow-external petsc4py  --allow-unverified petsc4py \
        < requirements-git.txt"
+  - pip install pulp
 install: "python setup.py develop"
 # command to run tests
 script:

diff --git a/pyop2/base.py b/pyop2/base.py
@@ -167,8 +167,9 @@ def _depends_on(reads, writes, cont):
         if configuration['loop_fusion']:
             from fusion import fuse
             to_run = fuse('from_trace', to_run, 0)
-        for comp in to_run:
-            comp._run()
+        with timed_region("Trace: eval"):
+            for comp in to_run:
+                comp._run()
 
 
 _trace = ExecutionTrace()
@@ -3318,7 +3319,7 @@ def __init__(self, dsets, maps, name=None, nest=None):
             self._d_nz = sum(s._d_nz for s in self)
             self._o_nz = sum(s._o_nz for s in self)
         else:
-            with timed_region("Build sparsity"):
+            with timed_region("CreateSparsity"):
                 build_sparsity(self, parallel=MPI.parallel, block=self._block_sparse)
             self._blocks = [[self]]
             self._nested = False
@@ -4110,22 +4111,22 @@ def _jitmodule(self):
     @collective
     def compute(self):
         """Executes the kernel over all members of the iteration space."""
-        self.halo_exchange_begin()
-        iterset = self.iterset
-        arglist = self.prepare_arglist(iterset, *self.args)
-        fun = self._jitmodule
-        self._compute(iterset.core_part, fun, *arglist)
-        self.halo_exchange_end()
-        self._compute(iterset.owned_part, fun, *arglist)
-        self.reduction_begin()
-        if self._only_local:
-            self.reverse_halo_exchange_begin()
-            self.reverse_halo_exchange_end()
-        if self.needs_exec_halo:
-            self._compute(iterset.exec_part, fun, *arglist)
-        self.reduction_end()
-        self.update_arg_data_state()
-        self.log_flops()
+        with timed_region("ParLoopExecute"):
+            self.halo_exchange_begin()
+            iterset = self.iterset
+            arglist = self.prepare_arglist(iterset, *self.args)
+            fun = self._jitmodule
+            self._compute(iterset.core_part, fun, *arglist)
+            self.halo_exchange_end()
+            self._compute(iterset.owned_part, fun, *arglist)
+            self.reduction_begin()
+            if self._only_local:
+                self.reverse_halo_exchange_begin()
+                self.reverse_halo_exchange_end()
+            if self.needs_exec_halo:
+                self._compute(iterset.exec_part, fun, *arglist)
+            self.reduction_end()
+            self.update_arg_data_state()
 
     @collective
     def _compute(self, part, fun, *arglist):
@@ -4139,7 +4140,6 @@ def _compute(self, part, fun, *arglist):
         raise RuntimeError("Must select a backend")
 
     @collective
-    @timed_function('ParLoop halo exchange begin')
     def halo_exchange_begin(self):
         """Start halo exchanges."""
         if self.is_direct:
@@ -4148,7 +4148,6 @@ def halo_exchange_begin(self):
             arg.halo_exchange_begin(update_inc=self._only_local)
 
     @collective
-    @timed_function('ParLoop halo exchange end')
     def halo_exchange_end(self):
         """Finish halo exchanges (wait on irecvs)"""
         if self.is_direct:
@@ -4157,7 +4156,6 @@ def halo_exchange_end(self):
             arg.halo_exchange_end(update_inc=self._only_local)
 
     @collective
-    @timed_function('ParLoop reverse halo exchange begin')
     def reverse_halo_exchange_begin(self):
         """Start reverse halo exchanges (to gather remote data)"""
         if self.is_direct:
@@ -4167,7 +4165,6 @@ def reverse_halo_exchange_begin(self):
                 arg.data.halo_exchange_begin(reverse=True)
 
     @collective
-    @timed_function('ParLoop reverse halo exchange end')
     def reverse_halo_exchange_end(self):
         """Finish reverse halo exchanges (to gather remote data)"""
         if self.is_direct:
@@ -4177,14 +4174,14 @@ def reverse_halo_exchange_end(self):
                 arg.data.halo_exchange_end(reverse=True)
 
     @collective
-    @timed_function('ParLoop reduction begin')
+    @timed_function("ParLoopReductionBegin")
     def reduction_begin(self):
         """Start reductions"""
         for arg in self.global_reduction_args:
             arg.reduction_begin()
 
     @collective
-    @timed_function('ParLoop reduction end')
+    @timed_function("ParLoopReductionEnd")
     def reduction_end(self):
         """End reductions"""
         for arg in self.global_reduction_args:

diff --git a/pyop2/configuration.py b/pyop2/configuration.py
@@ -65,7 +65,6 @@ class Configuration(dict):
         program exit?
     :param print_summary: Should PyOP2 print a summary of timings at
         program exit?
-    :param profiling: Profiling mode (CUDA kernels are launched synchronously)
     :param matnest: Should matrices on mixed maps be built as nests? (Default yes)
     """
     # name, env variable, type, default, write once
@@ -88,7 +87,6 @@ class Configuration(dict):
         "no_fork_available": ("PYOP2_NO_FORK_AVAILABLE", bool, False),
         "print_cache_size": ("PYOP2_PRINT_CACHE_SIZE", bool, False),
         "print_summary": ("PYOP2_PRINT_SUMMARY", bool, False),
-        "profiling": ("PYOP2_PROFILING", bool, False),
         "dump_gencode_path": ("PYOP2_DUMP_GENCODE_PATH", str,
                               os.path.join(gettempdir(), "pyop2-gencode")),
         "matnest": ("PYOP2_MATNEST", bool, True),

diff --git a/pyop2/cuda.py b/pyop2/cuda.py
@@ -43,7 +43,6 @@
 import device as op2
 from device import *
 import plan
-from profiling import lineprof, Timer
 from utils import verify_reshape
 
 
@@ -783,13 +782,8 @@ def compile(self):
         del self._config
         return self._fun
 
-    @timed_function("ParLoop kernel")
     def __call__(self, grid, block, stream, *args, **kwargs):
-        if configuration["profiling"]:
-            t_ = self.compile().prepared_timed_call(grid, block, *args, **kwargs)()
-            Timer("CUDA kernel").add(t_)
-        else:
-            self.compile().prepared_async_call(grid, block, stream, *args, **kwargs)
+        self.compile().prepared_async_call(grid, block, stream, *args, **kwargs)
 
 
 class ParLoop(op2.ParLoop):
@@ -823,7 +817,6 @@ def launch_configuration(self, part):
                     'WARPSIZE': 32}
 
     @collective
-    @lineprof
     def _compute(self, part, fun, *arglist):
         if part.size == 0:
             # Return before plan call if no computation should occur

diff --git a/pyop2/fusion.py b/pyop2/fusion.py
@@ -44,7 +44,7 @@
 import host
 from backends import _make_object
 from caching import Cached
-from profiling import lineprof, timed_region, profile
+from profiling import timed_region
 from logger import warning, info as log_info
 from mpi import collective
 from configuration import configuration
@@ -413,7 +413,6 @@ def __init__(self, kernel, it_space, *args, **kwargs):
         self._executor = kwargs.get('executor')
 
     @collective
-    @profile
     def compute(self):
         """Execute the kernel over all members of the iteration space."""
         arglist = self.prepare_arglist(None, *self.args)
@@ -446,7 +445,6 @@ def prepare_arglist(self, part, *args):
         return arglist
 
     @collective
-    @lineprof
     def _compute(self, *arglist):
         kwargs = {
             'all_args': self._all_args,

diff --git a/pyop2/op2.py b/pyop2/op2.py
@@ -125,10 +125,6 @@ def exit():
         print '**** PyOP2 cache sizes at exit ****'
         report_cache(typ=ObjectCached)
         report_cache(typ=Cached)
-    if configuration['print_summary'] and MPI.comm.rank == 0:
-        from profiling import summary
-        print '**** PyOP2 timings summary ****'
-        summary()
     configuration.reset()
 
     if backends.get_backend() != 'pyop2.void':

diff --git a/pyop2/opencl.py b/pyop2/opencl.py
@@ -46,7 +46,6 @@
 from logger import warning
 import plan
 import petsc_base
-from profiling import lineprof
 from utils import verify_reshape, uniquify, maybe_setflags
 
 
@@ -549,7 +548,7 @@ def __call__(self, thread_count, work_group_size, *args):
         fun = self.compile()
         for i, arg in enumerate(args):
             fun.set_arg(i, arg)
-        with timed_region("ParLoop kernel"):
+        with timed_region("ParLoopCKernel"):
             cl.enqueue_nd_range_kernel(_queue, fun, (thread_count,),
                                        (work_group_size,), g_times_l=False).wait()
 
@@ -649,7 +648,6 @@ def launch_configuration(self):
             return {'partition_size': self._i_partition_size()}
 
     @collective
-    @lineprof
     def _compute(self, part, fun, *arglist):
         if part.size == 0:
             # Return before plan call if no computation should occur

diff --git a/pyop2/openmp.py b/pyop2/openmp.py
@@ -47,7 +47,6 @@
 from logger import warning
 import plan as _plan
 from petsc_base import *
-from profiling import lineprof
 from utils import *
 
 # hard coded value to max openmp threads
@@ -282,7 +281,6 @@ def _jitmodule(self):
                          direct=self.is_direct, iterate=self.iteration_region)
 
     @collective
-    @lineprof
     def _compute(self, part, fun, *arglist):
         if part.size > 0:
             # TODO: compute partition size
@@ -293,7 +291,7 @@ def _compute(self, part, fun, *arglist):
             boffset = 0
             for c in range(plan.ncolors):
                 nblocks = plan.ncolblk[c]
-                with timed_region("ParLoop kernel"):
+                with timed_region("ParLoopCKernel"):
                     fun(boffset, nblocks, blkmap, offset, nelems, *arglist)
                 boffset += nblocks
 

diff --git a/pyop2/petsc_base.py b/pyop2/petsc_base.py
@@ -580,7 +580,7 @@ def _init_monolithic(self):
         mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, True)
         mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True)
         # Put zeros in all the places we might eventually put a value.
-        with timed_region("Zero initial matrix"):
+        with timed_region("MatZeroInitial"):
             for i in range(rows):
                 for j in range(cols):
                     sparsity.fill_with_zeros(self[i, j].handle,
@@ -648,7 +648,7 @@ def _init_block(self):
         mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True)
 
         # Put zeros in all the places we might eventually put a value.
-        with timed_region("Zero initial matrix"):
+        with timed_region("MatZeroInitial"):
             sparsity.fill_with_zeros(mat, self.sparsity.dims[0][0], self.sparsity.maps)
 
         # Now we've filled up our matrix, so the sparsity is
@@ -847,10 +847,9 @@ def monitor(ksp, its, norm):
                 debug("%3d KSP Residual norm %14.12e" % (its, norm))
             self.setMonitor(monitor)
         # Not using super here since the MRO would call base.Solver.solve
-        with timed_region("PETSc Krylov solver"):
-            with b.vec_ro as bv:
-                with x.vec as xv:
-                    PETSc.KSP.solve(self, bv, xv)
+        with b.vec_ro as bv:
+            with x.vec as xv:
+                PETSc.KSP.solve(self, bv, xv)
         if self.parameters['plot_convergence']:
             self.cancelMonitor()
             try: