-
Notifications
You must be signed in to change notification settings - Fork 254
compiler: Misc enhancements for lowering of parlang backends #2878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c65c28a
3f7f6dc
1df126b
4ab49c8
9d34058
8b6731a
c61b8cc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,6 @@ | ||
| from dataclasses import dataclass | ||
| from functools import cached_property | ||
| from itertools import chain | ||
|
|
||
| from devito.ir.iet.nodes import Call, Callable | ||
| from devito.ir.iet.utils import derive_parameters | ||
|
|
@@ -11,6 +13,7 @@ | |
| 'CommCallable', | ||
| 'DeviceCall', | ||
| 'DeviceFunction', | ||
| 'EFuncMeta', | ||
| 'ElementalCall', | ||
| 'ElementalFunction', | ||
| 'EntryFunction', | ||
|
|
@@ -21,6 +24,38 @@ | |
| ] | ||
|
|
||
|
|
||
| @dataclass(frozen=True) | ||
| class EFuncMeta: | ||
|
|
||
| body: object = None | ||
| efuncs: tuple = () | ||
| includes: tuple = () | ||
| namespaces: tuple = () | ||
| libs: tuple = () | ||
|
|
||
| @classmethod | ||
| def compose(cls, *items): | ||
| items = tuple(items) | ||
|
|
||
| if not items: | ||
| return cls() | ||
|
|
||
| return cls( | ||
| body=items[-1].body, | ||
| efuncs=tuple(chain.from_iterable(i.efuncs for i in items)), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that's just our
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no it's an iterable of an iterable that you're chaining together here |
||
| includes=tuple(chain.from_iterable(i.includes for i in items)), | ||
| namespaces=tuple(chain.from_iterable(i.namespaces for i in items)), | ||
| libs=tuple(chain.from_iterable(i.libs for i in items)) | ||
| ) | ||
|
|
||
| def __iter__(self): | ||
| yield self.body | ||
| yield self.efuncs | ||
| yield self.includes | ||
| yield self.namespaces | ||
| yield self.libs | ||
|
|
||
|
|
||
| # ElementalFunction machinery | ||
|
|
||
| class ElementalCall(Call): | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,11 +97,6 @@ def __init__(self, name, val=None): | |
| A Dimension along which prefetching is feasible and beneficial. | ||
| """ | ||
|
|
||
| PREFETCHABLE_SHM = Property('prefetchable-shm') | ||
| """ | ||
| A Dimension along which shared-memory prefetching is feasible and beneficial. | ||
| """ | ||
|
|
||
| INIT_CORE_SHM = Property('init-core-shm') | ||
| """ | ||
| A Dimension along which the shared-memory CORE data region is initialized. | ||
|
|
@@ -190,32 +185,6 @@ def update_properties(properties, exprs): | |
| if not exprs: | ||
| return properties | ||
|
|
||
| # Auto-detect prefetchable Dimensions | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. now unused, hence dropping |
||
| dims = set() | ||
| flag = False | ||
| for e in as_tuple(exprs): | ||
| w, r = e.args | ||
|
|
||
| # Ensure it's in the form `Indexed = Indexed` | ||
| try: | ||
| wf, rf = w.function, r.function | ||
| except AttributeError: | ||
| break | ||
|
|
||
| if not rf or not wf._mem_shared: | ||
| break | ||
| dims.update({d.parent for d in wf.dimensions if d.parent in properties}) | ||
|
|
||
| if not rf._mem_heap: | ||
| break | ||
| else: | ||
| flag = True | ||
|
|
||
| if flag: | ||
| properties = properties.prefetchable_shm(dims) | ||
| else: | ||
| properties = properties.drop(properties=PREFETCHABLE_SHM) | ||
|
|
||
| # Remove properties that are trivially incompatible with `exprs` | ||
| if not all(e.lhs.function._mem_shared for e in as_tuple(exprs)): | ||
| drop = {INIT_CORE_SHM, INIT_HALO_LEFT_SHM, INIT_HALO_RIGHT_SHM} | ||
|
|
@@ -284,9 +253,6 @@ def prefetchable(self, dims, v=PREFETCHABLE): | |
| m[d] = self.get(d, set()) | {v} | ||
| return Properties(m) | ||
|
|
||
| def prefetchable_shm(self, dims): | ||
| return self.prefetchable(dims, PREFETCHABLE_SHM) | ||
|
|
||
| def block(self, dims, kind='default'): | ||
| if kind == 'default': | ||
| p = TILABLE | ||
|
|
@@ -357,9 +323,6 @@ def _is_property_any(self, dims, v): | |
| def is_prefetchable(self, dims=None, v=PREFETCHABLE): | ||
| return self._is_property_any(dims, PREFETCHABLE) | ||
|
|
||
| def is_prefetchable_shm(self, dims=None): | ||
| return self._is_property_any(dims, PREFETCHABLE_SHM) | ||
|
|
||
| def is_core_init(self, dims=None): | ||
| return self._is_property_any(dims, INIT_CORE_SHM) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -139,6 +139,10 @@ def _aliases_from_clusters(self, cgroup, exclude, meta): | |
| # [Schedule]_m -> Schedule (s.t. best memory/flops trade-off) | ||
| schedule, exprs = self._select(variants) | ||
|
|
||
| # Schedule -> Schedule (optimization) | ||
| if self.opt_maxpar: | ||
| schedule = optimize_schedule_maxpar(schedule) | ||
|
|
||
| # Schedule -> Schedule (optimization) | ||
| if self.opt_rotate: | ||
| schedule = optimize_schedule_rotations(schedule, self.sregistry) | ||
|
|
@@ -664,7 +668,6 @@ def lower_aliases(aliases, meta, maxpar): | |
| """ | ||
| Create a Schedule from an AliasList. | ||
| """ | ||
| stampcache = {} | ||
| dmapper = {} | ||
| processed = [] | ||
| for a in aliases: | ||
|
|
@@ -704,12 +707,6 @@ def lower_aliases(aliases, meta, maxpar): | |
| # use `<1>` as stamp, which is what appears in `ispace` | ||
| interval = interval.lift(i.stamp) | ||
|
|
||
| # We further bump the interval stamp if we were requested to trade | ||
| # fusion for more collapse-parallelism | ||
| if maxpar: | ||
| stamp = stampcache.setdefault(interval.dim, Stamp()) | ||
| interval = interval.lift(stamp) | ||
|
|
||
| writeto.append(interval) | ||
| intervals.append(interval) | ||
|
|
||
|
|
@@ -853,6 +850,30 @@ def optimize_schedule_rotations(schedule, sregistry): | |
| return schedule.rebuild(*processed, rmapper=rmapper) | ||
|
|
||
|
|
||
| def optimize_schedule_maxpar(schedule): | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is 95% just like before, but it now also triggers in some rare cases excited in PRO |
||
| """ | ||
| Bump the IterationSpace' stamp trading fusion for more collapse-parallelism. | ||
| """ | ||
| key = lambda i: (i.writeto, i.ispace) | ||
|
|
||
| processed = [] | ||
| for (writeto0, ispace0), group in groupby(schedule, key=key): | ||
| g = list(group) | ||
|
|
||
| stamp = Stamp() | ||
| dims = writeto0.itdims | ||
|
|
||
| writeto = writeto0.lift(dims, stamp) | ||
| ispace = ispace0.lift(dims, stamp) | ||
|
|
||
| processed.extend([ | ||
| ScheduledAlias(pivot, writeto, ispace, aliaseds, indicess) | ||
| for pivot, _, _, aliaseds, indicess in g | ||
| ]) | ||
|
|
||
| return schedule.rebuild(*processed) | ||
|
|
||
|
|
||
| def lower_schedule(schedule, meta, sregistry, opt_ftemps, opt_min_dtype, | ||
| opt_minmem): | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1593,15 +1593,6 @@ def _time_buffering(self): | |
| def _time_buffering_default(self): | ||
| return self._time_buffering and not isinstance(self.save, Buffer) | ||
|
|
||
| def _evaluate(self, **kwargs): | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mloubout dropped as per your suggestion |
||
| retval = super()._evaluate(**kwargs) | ||
| if not self._time_buffering and not retval.is_Function: | ||
| # Saved TimeFunction might need streaming, expand interpolations | ||
| # for easier processing | ||
| return retval.evaluate | ||
| else: | ||
| return retval | ||
|
|
||
| def _arg_check(self, args, intervals, **kwargs): | ||
| super()._arg_check(args, intervals, **kwargs) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,10 +11,11 @@ | |
| from functools import cached_property | ||
|
|
||
| import numpy as np | ||
| from sympy import Expr | ||
|
|
||
| from devito.exceptions import InvalidArgument | ||
| from devito.parameters import configuration | ||
| from devito.symbolics import search | ||
| from devito.symbolics import Reserved, Terminal, search | ||
| from devito.tools import as_list, as_tuple, is_integer | ||
| from devito.types.array import Array, ArrayObject | ||
| from devito.types.basic import Scalar, Symbol | ||
|
|
@@ -35,7 +36,9 @@ | |
| 'QueueID', | ||
| 'SharedData', | ||
| 'TBArray', | ||
| 'TensorMove', | ||
| 'ThreadArray', | ||
| 'ThreadArrive', | ||
| 'ThreadCommit', | ||
| 'ThreadID', | ||
| 'ThreadPoolSync', | ||
|
|
@@ -365,12 +368,24 @@ class ThreadCommit(Fence): | |
| pass | ||
|
|
||
|
|
||
| class ThreadArrive(Fence): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this to make use of more sophisticated/granular thread barriers to reduce idle time somewhere?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pretty much yes |
||
|
|
||
| """ | ||
| A generic arrive operation for a single thread, typically used to signal | ||
| the arrival at a certain point through a suitable synchronization object. | ||
| """ | ||
|
|
||
| pass | ||
|
|
||
|
|
||
| class ThreadWait(Fence): | ||
|
|
||
| """ | ||
| A generic wait operation for a single thread, typically used to synchronize | ||
| after a memory operation issued at a specific program point with a | ||
| ThreadCommit operation. | ||
| with other threads over: | ||
|
|
||
| * a memory operation issued by a prior ThreadCommit operation. | ||
| * the consumption of a shared resource via a ThreadArrive operation. | ||
| """ | ||
|
|
||
| pass | ||
|
|
@@ -386,3 +401,18 @@ def __init_finalize__(self, *args, **kwargs): | |
| kwargs['liveness'] = 'eager' | ||
|
|
||
| super().__init_finalize__(*args, **kwargs) | ||
|
|
||
|
|
||
| class TensorMove(Expr, Reserved, Terminal): | ||
|
|
||
| """ | ||
| Represent the LOAD/STORE of a multi-dimensional block of data from/to a higher | ||
| level of the memory hierarchy | ||
| """ | ||
|
|
||
| func = Reserved._rebuild | ||
|
|
||
| def _ccode(self, printer): | ||
| return str(self) | ||
|
|
||
| _sympystr = _ccode | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why -1?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is because the idea is that you may have a sequence of IET passes
pseudocode example:
and each of those passes introduces different includes/libs/efuncs/...
in the end you want the union of various includes/libs/efuncs/... but the body of the last pass
so you
EfuncMeta.compose(v0, v1, v2)and that way it will use v2's body