Skip to content
This repository has been archived by the owner on Nov 27, 2024. It is now read-only.

Commit

Permalink
Fix iaca for lhs.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gheorghe-Teodor Bercea committed Mar 31, 2015
1 parent 2912f4d commit e1b1f15
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 12 deletions.
35 changes: 25 additions & 10 deletions pyop2/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ def _iaca_ast_to_c(self, ast, opts={}):
iaca_ast, last, nest, loop_count = coffee.utils.insert_iaca(ast, iakify)
if not last:
iakify += 1
ast_handler = ASTKernel(iaca_ast, self._include_dirs)
ast_handler.plan_cpu(opts)
self._applied_blas = ast_handler.blas
self._applied_ap = ast_handler.ap
iaca_kernels.append([ast_handler.gencode(), nest, loop_count])
# ast_handler = ASTKernel(iaca_ast, self._include_dirs)
# ast_handler.plan_cpu(opts)
# self._applied_blas = ast_handler.blas
# self._applied_ap = ast_handler.ap
# iaca_kernels.append([ast_handler.gencode(), nest, loop_count])
# from IPython import embed; embed()
iaca_kernels.append([iaca_ast.gencode(), nest, loop_count])
return iaca_kernels


Expand Down Expand Up @@ -777,18 +779,25 @@ def sum_nested_flop_values(self, iaca_kernels, val_pos):
for ind, ic in enumerate(reversed(iaca_kernels)):
if ic[1] == prev_nest:
i_flops += (ic[2] if ic[3] else 1) * ic[val_pos]
# print "same nest: ", i_flops
elif ic[1] < prev_nest:
iaca_block_flops = 0
for ic_prev in iaca_kernels[len(iaca_kernels) - ind:]:
if prev_nest == ic_prev[1]:
iaca_block_flops += ic_prev[val_pos]
else:
break
i_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
# else:
# break
curr_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
if ic[6] == 1:
i_flops = min(curr_flops, (ic[2] if ic[3] else 1) * ic[val_pos])
else:
i_flops = curr_flops
# print "prev nest greater: ", i_flops
else:
# Not tested yet. Might never apply to FFC kernels.
total_flops += i_flops
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
# print "prev nest smaller: ", i_flops
prev_nest = ic[1]
total_flops += i_flops
return total_flops
Expand All @@ -807,32 +816,37 @@ def sum_nested_cycle_values(self, iaca_kernels, val_pos):
if ic[1] == prev_nest:
# Add the cycles or flops and multiply by loop counter if the loop is not unrolled
i_flops += (ic[2] if ic[3] else 1) * ic[val_pos]
# print "same nest: ", i_flops
# if the current loop contains the previous one (has a lower nest number).
elif ic[1] < prev_nest:
# if there is only one jump instruction and the loop is not unrolled (i.e. inner loops unrolled but current not unrolled)
if ic[6] == 1 and ic[3]:
# loop counter times the number of cycles
i_flops = ic[2] * ic[val_pos]
# print "greater nest 1: ", i_flops
else:
# sum up the values for the static cycle counts for the loops contaied in the current loop
# regardless of their unrolled/not unrolled status
iaca_block_flops = 0
for ic_prev in iaca_kernels[len(iaca_kernels) - ind:]:
if prev_nest == ic_prev[1]:
iaca_block_flops += ic_prev[val_pos]
else:
break
# else:
# break
# if there are less cycles to be done in the current loop then return that count
if iaca_block_flops > ic[val_pos]:
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
# print "greater nest 2: ", i_flops
else:
# iflops contains the flop count for the inner loops, add to that the flops generated by the rest of the code
# in the current loop
i_flops = (ic[2] if ic[3] else 1) * (i_flops + ic[val_pos] - iaca_block_flops)
# print "greater nest 3: ", i_flops
else:
# Not tested yet. Might never apply to FFC kernels.
total_flops += i_flops
i_flops = (ic[2] if ic[3] else 1) * ic[val_pos]
# print "smaller nest: ", i_flops
prev_nest = ic[1]
total_flops += i_flops
return total_flops
Expand Down Expand Up @@ -985,6 +999,7 @@ def compile(self, argtypes=None, restype=None):
wrapper_code['iaca_end'] = ""
for ind in range(1, len(iaca_kernels)):
iaca_kernels[ind][0] = self.get_c_code(iaca_kernels[ind][0], wrapper_code)
for ind in range(1, len(iaca_cycle_kernels)):
iaca_cycle_kernels[ind][0] = self.get_c_code(iaca_cycle_kernels[ind][0], wrapper_code)
iaca_path = path_to_iaca_file + region_name + "_" + self._kernel._md5 + ".txt"
self.build_loop_nest_reports(iaca_kernels, wrapper_code, iaca_path, compilation, extension, cppargs, ldargs, argtypes, restype, compiler)
Expand Down
10 changes: 8 additions & 2 deletions pyop2/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(self,
self.folds["iaca_flops"] = fold_stats
# IACA reported cycles for the loop over the columns (extruded mesh)
self._cycles = []
self.folds["cycles"] = fold_stats
self.folds["cycles"] = SUM if self._is_proc else AVG

def _ncalls(self):
"""Number of calls per process"""
Expand Down Expand Up @@ -218,7 +218,7 @@ def plot_list(self, frequency):
self.v_volume, self.m_volume, self.mv_volume,
self.vbw, self.mbw, self.mvbw, self.rvbw,
self.iaca_flops, self.papi_flops,
self.iaca_mflops, self.papi_mflops, self.cycles / frequency, self.c_runtime]
self.iaca_mflops, self.papi_mflops, self.cycles * 1.0 / frequency, self.c_runtime]

@property
def name(self):
Expand Down Expand Up @@ -278,6 +278,8 @@ def iaca_flops(self):

@property
def cycles(self):
if not self._is_proc:
return self._reduce("cycles", self._cycles)
return self._reduce("cycles", self._cycles) / 1e9

#################################################
Expand Down Expand Up @@ -358,10 +360,14 @@ def cycles(self, value):

@property
def runtime(self):
if not self._is_proc:
return self.end_time - self.start_time
return self._ncalls() * (self.end_time - self.start_time)

@property
def rv_runtime(self):
if not self._is_proc:
return self.rv_end_time - self.rv_start_time
return self._ncalls() * (self.rv_end_time - self.rv_start_time)

@property
Expand Down

0 comments on commit e1b1f15

Please sign in to comment.