Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Summarize changes to support prediction #1

Open
wants to merge 34 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
50dc7d1
Make CEESD changes to current inducer@main.
MTCam Jul 17, 2024
7d5a1fb
Add missing _with_new_tags method to instruction
MTCam Jul 18, 2024
f494955
TypeInferenceMapper: allow np.bool in map_type_case
matthiasdiener Jul 18, 2024
41dcf01
Merge branch 'main' into production-pilot
MTCam Jul 20, 2024
1137915
Merge branch 'main' into production-pilot
MTCam Jul 26, 2024
199aede
Merge branch 'main' into production-pilot-up2date
MTCam Aug 29, 2024
cc0bebe
Rename --> _get_partial_loop_nest_tree
MTCam Aug 29, 2024
541fc1e
rename _pull_out_loop_nest --> separate_loop_next
MTCam Aug 29, 2024
94c0cdd
Use production version of generate_loop_schedules_v2 in _generate_loo…
MTCam Aug 29, 2024
6e8685a
Rename get_loop_nest_tree --> get_loop_tree
MTCam Aug 29, 2024
7e6e37f
Apply https://github.com/inducer/loopy/pull/848, convert tree to fro…
MTCam Aug 29, 2024
c946af9
Merge branch 'main' into production-pilot
MTCam Sep 3, 2024
d64038b
Merge branch 'inducer:main' into production-pilot
matthiasdiener Sep 6, 2024
9092def
Merge branch 'main' into production-pilot
MTCam Sep 12, 2024
6062e96
Merge branch 'main' into production-pilot
MTCam Sep 23, 2024
956d591
Merge branch 'main' into production-pilot
MTCam Oct 13, 2024
3b04164
Refuse to fuse nestable kernels
MTCam Oct 19, 2024
fc274fa
Be more like @matthiasdiener soln
MTCam Oct 19, 2024
6056dae
Merge with main.
MTCam Oct 23, 2024
d22162d
Merge branch 'main' into production-pilot
MTCam Nov 1, 2024
a82ffa0
Merge branch 'main' into production-pilot
MTCam Nov 5, 2024
8baeabc
Merge branch 'main' into production-pilot
MTCam Nov 7, 2024
5d46b46
Merge branch 'main' into production-pilot
MTCam Nov 11, 2024
65ff983
Fix quick mirgecom@main CI
MTCam Nov 12, 2024
dfc8adc
Merge remote-tracking branch 'origin/production-pilot' into production
MTCam Nov 15, 2024
d37031d
Merge branch 'main' into production-pilot
MTCam Nov 15, 2024
e5dbc22
Merge branch 'production-pilot' into production
MTCam Nov 20, 2024
cd606ed
Merge branch 'main' into production-pilot
MTCam Nov 24, 2024
9a33871
Update from merge errors
MTCam Nov 24, 2024
a1ad083
Merge branch 'main' into production-pilot
MTCam Nov 28, 2024
e57e22c
Merge branch 'main' into production-pilot
MTCam Dec 2, 2024
da6d2fe
Merge branch 'main' into production-pilot
MTCam Dec 5, 2024
d9616e6
Merge branch 'main' into production-pilot
MTCam Dec 6, 2024
2ab9461
Merge branch 'main' into production-pilot
MTCam Dec 17, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
from loopy.target.execution import ExecutorBase
from loopy.target.ispc import ISPCTarget
from loopy.target.opencl import OpenCLTarget
from loopy.target.pycuda import PyCudaTarget, PyCudaWithPackedArgsTarget
from loopy.target.pyopencl import PyOpenCLTarget
from loopy.tools import Optional, clear_in_mem_caches, memoize_on_disk, t_unit_to_python
from loopy.transform.add_barrier import add_barrier
Expand Down Expand Up @@ -144,6 +145,7 @@
tag_array_axes,
tag_data_axes,
)
from loopy.transform.domain import decouple_domain
from loopy.transform.fusion import fuse_kernels
from loopy.transform.iname import (
add_inames_for_unused_hw_axes,
Expand Down Expand Up @@ -180,6 +182,10 @@
simplify_indices,
tag_instructions,
)
from loopy.transform.loop_fusion import (
get_kennedy_unweighted_fusion_candidates,
rename_inames_in_batch
)
from loopy.transform.pack_and_unpack_args import pack_and_unpack_args_for_call
from loopy.transform.padding import (
add_padding,
Expand All @@ -195,6 +201,10 @@
unprivatize_temporaries_with_inames,
)
from loopy.transform.realize_reduction import realize_reduction
from loopy.transform.reduction import (
hoist_invariant_multiplicative_terms_in_sum_reduction,
extract_multiplicative_terms_in_sum_reduction_as_subst)
from loopy.transform.reindex import reindex_temporary_using_seghir_loechner_scheme
from loopy.transform.save import save_and_reload_temporaries
from loopy.transform.subst import (
assignment_to_subst,
Expand Down Expand Up @@ -262,6 +272,8 @@
"Options",
"OrderedAtomic",
"PreambleInfo",
"PyCudaTarget",
"PyCudaWithPackedArgsTarget",
"PyOpenCLTarget",
"Reduction",
"ScalarCallable",
Expand Down Expand Up @@ -302,8 +314,10 @@
"clear_in_mem_caches",
"collect_common_factors_on_increment",
"concatenate_arrays",
"decouple_domain",
"duplicate_inames",
"expand_subst",
"extract_multiplicative_terms_in_sum_reduction_as_subst",
"extract_subst",
"find_instructions",
"find_most_recent_global_barrier",
Expand All @@ -325,6 +339,7 @@
"get_dot_dependency_graph",
"get_global_barrier_order",
"get_iname_duplication_options",
"get_kennedy_unweighted_fusion_candidates",
"get_mem_access_map",
"get_one_linearized_kernel",
"get_one_scheduled_kernel",
Expand All @@ -333,6 +348,7 @@
"get_subkernels",
"get_synchronization_map",
"has_schedulable_iname_nesting",
"hoist_invariant_multiplicative_terms_in_sum_reduction",
"infer_arg_descr",
"infer_unknown_types",
"inline_callable_kernel",
Expand Down Expand Up @@ -362,6 +378,7 @@
"register_preamble_generators",
"register_reduction_parser",
"register_symbol_manglers",
"reindex_temporary_using_seghir_loechner_scheme",
"remove_inames_from_insn",
"remove_instructions",
"remove_predicates_from_insn",
Expand All @@ -371,6 +388,7 @@
"rename_callable",
"rename_iname",
"rename_inames",
"rename_inames_in_batch",
"replace_instruction_ids",
"save_and_reload_temporaries",
"set_argument_order",
Expand Down Expand Up @@ -399,6 +417,14 @@
"untag_inames",
]

try:
import loopy.relations as relations
except ImportError:
# catching ImportErrors to avoid making minikanren a hard-dep
pass
else:
__all__ += ["relations"]

# }}}


Expand Down
21 changes: 19 additions & 2 deletions loopy/codegen/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,12 @@
import islpy as isl
from islpy import dim_type
from pymbolic.mapper.stringifier import PREC_NONE
from typing import FrozenSet

from loopy.codegen.control import build_loop_nest
from loopy.codegen.result import merge_codegen_results
from loopy.diagnostic import LoopyError, warn
from loopy.kernel import LoopKernel
from loopy.symbolic import flatten


Expand Down Expand Up @@ -348,6 +350,16 @@ def set_up_hw_parallel_loops(codegen_state, schedule_index, next_func,

# {{{ sequential loop

def _get_intersecting_inames(kernel: LoopKernel, iname: str) -> FrozenSet[str]:
from functools import reduce
return reduce(frozenset.union,
((kernel.id_to_insn[insn].within_inames
| kernel.id_to_insn[insn].reduction_inames()
| kernel.id_to_insn[insn].sub_array_ref_inames())
for insn in kernel.iname_to_insns()[iname]),
frozenset())


def generate_sequential_loop_dim_code(codegen_state, sched_index, hints):
kernel = codegen_state.kernel

Expand All @@ -359,8 +371,13 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index, hints):
from loopy.codegen.bounds import get_usable_inames_for_conditional

# Note: this does not include loop_iname itself!
usable_inames = get_usable_inames_for_conditional(kernel, sched_index,
codegen_state.codegen_cachemanager)
usable_inames = get_usable_inames_for_conditional(
kernel, sched_index, codegen_state.codegen_cachemanager)

# get rid of disjoint loop nests, see
# <www.github.com/inducer/loopy/issues/724>
usable_inames = usable_inames & _get_intersecting_inames(kernel,
loop_iname)

domain = kernel.get_inames_domain(loop_iname)

Expand Down
59 changes: 59 additions & 0 deletions loopy/kernel/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -2135,6 +2135,65 @@ def get_outer_params(domains):

# }}}

# {{{ get access map from an instruction


class _IndexCollector(CombineMapper):
def __init__(self, var):
self.var = var
super().__init__()

def combine(self, values):
import operator
return reduce(operator.or_, values, frozenset())

def map_subscript(self, expr):
if expr.aggregate.name == self.var:
return (super().map_subscript(expr) | frozenset([expr.index_tuple]))
else:
return super().map_subscript(expr)

def map_algebraic_leaf(self, expr):
return frozenset()

map_constant = map_algebraic_leaf


def _project_out_inames_from_maps(amaps, inames_to_project_out):
new_amaps = []
for amap in amaps:
for iname in inames_to_project_out:
dt, pos = amap.get_var_dict()[iname]
amap = amap.project_out(dt, pos, 1)

new_amaps.append(amap)

return new_amaps


def _union_amaps(amaps):
import islpy as isl
return reduce(isl.Map.union, amaps[1:], amaps[0])


def get_insn_access_map(kernel, insn_id, var):
from loopy.transform.subst import expand_subst
from loopy.symbolic import get_access_map

insn = kernel.id_to_insn[insn_id]

kernel = expand_subst(kernel)
indices = list(_IndexCollector(var)((insn.expression,
insn.assignees,
tuple(insn.predicates))))

amaps = [get_access_map(kernel.get_inames_domain(insn.within_inames),
idx, kernel.assumptions) for idx in indices]

return _union_amaps(amaps)

# }}}


def get_hw_axis_base_for_codegen(kernel: LoopKernel, iname: str) -> isl.Aff:
"""
Expand Down
122 changes: 122 additions & 0 deletions loopy/relations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
from kanren import Relation, facts


def get_inameo(kernel):
inameo = Relation()
for iname in kernel.all_inames():
facts(inameo, (iname,))
return inameo


def get_argo(kernel):
argo = Relation()
for arg in kernel.args:
facts(argo, (arg.name,))

return argo


def get_tempo(kernel):
tempo = Relation()
for tv in kernel.temporary_variables:
facts(tempo, (tv,))

return tempo


def get_insno(kernel):
insno = Relation()
for insn in kernel.instructions:
facts(insno, (insn.id,))

return insno


def get_taggedo(kernel):
taggedo = Relation()

for arg_name, arg in kernel.arg_dict.items():
for tag in arg.tags:
facts(taggedo, (arg_name, tag))

for iname_name, iname in kernel.inames.items():
for tag in iname.tags:
facts(taggedo, (iname_name, tag))

for insn in kernel.instructions:
for tag in insn.tags:
facts(taggedo, (insn.id, tag))

return taggedo


def get_taggedo_of_type(kernel, tag_type):
taggedo = Relation()

for arg_name, arg in kernel.arg_dict.items():
for tag in arg.tags_of_type(tag_type):
facts(taggedo, (arg_name, tag))

for iname_name, iname in kernel.inames.items():
for tag in iname.tags_of_type(tag_type):
facts(taggedo, (iname_name, tag))

for insn in kernel.instructions:
for tag in insn.tags_of_type(tag_type):
facts(taggedo, (insn.id, tag))

return taggedo


def get_producero(kernel):
producero = Relation()

for insn in kernel.instructions:
for var in insn.assignee_var_names():
facts(producero, (insn.id, var))

return producero


def get_consumero(kernel):
consumero = Relation()

for insn in kernel.instructions:
for var in insn.read_dependency_names():
facts(consumero, (insn.id, var))

return consumero


def get_withino(kernel):
withino = Relation()

for insn in kernel.instructions:
facts(withino, (insn.id, insn.within_inames))

return withino


def get_reduce_insno(kernel):
reduce_insno = Relation()

for insn in kernel.instructions:
if insn.reduction_inames():
facts(reduce_insno, (insn.id,))

return reduce_insno


def get_reduce_inameo(kernel):
from functools import reduce
reduce_inameo = Relation()

for iname in reduce(frozenset.union,
(insn.reduction_inames()
for insn in kernel.instructions),
frozenset()):
facts(reduce_inameo, (iname,))

return reduce_inameo

# vim: fdm=marker
Loading
Loading