Skip to content

Commit

Permalink
SCC: Tidy up SCCAnnotate; rename methods and remove horizontal
Browse files Browse the repository at this point in the history
We also change the static classmethods to regular methods to
provide acces to the `directive` property in the follow-up.
  • Loading branch information
mlange05 committed Aug 9, 2024
1 parent 291ad45 commit bce73e9
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 102 deletions.
143 changes: 55 additions & 88 deletions loki/transformations/single_column/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
symbols as sym, FindVariables, is_dimension_constant
)
from loki.ir import (
nodes as ir, FindNodes, Transformer, pragmas_attached,
is_loki_pragma, get_pragma_parameters
nodes as ir, FindNodes, pragmas_attached, is_loki_pragma,
get_pragma_parameters
)
from loki.logging import info
from loki.tools import as_tuple, flatten
Expand All @@ -32,9 +32,6 @@ class SCCAnnotateTransformation(Transformation):
Parameters
----------
horizontal : :any:`Dimension`
:any:`Dimension` object describing the variable conventions used in code
to define the horizontal data dimension and iteration space.
block_dim : :any:`Dimension`
Optional ``Dimension`` object to define the blocking dimension
to use for hoisted column arrays if hoisting is enabled.
Expand All @@ -43,16 +40,14 @@ class SCCAnnotateTransformation(Transformation):
``'openacc'`` or ``None``.
"""

def __init__(self, horizontal, directive, block_dim):
self.horizontal = horizontal
def __init__(self, directive, block_dim):
self.directive = directive
self.block_dim = block_dim

@classmethod
def kernel_annotate_vector_loops_openacc(cls, routine):
def annotate_vector_loops(self, routine):
"""
Insert ``!$acc loop vector`` annotations around horizontal vector
loops, including the necessary private variable declarations.
Insert ``!$acc loop vector`` for previously marked loops,
including addition of the necessary private variable declarations.
Parameters
----------
Expand Down Expand Up @@ -88,8 +83,7 @@ def kernel_annotate_vector_loops_openacc(cls, routine):
private_clause = '' if not private_arrays else f' private({private_arrs})'
pragma._update(keyword='acc', content=f'loop vector{private_clause}')

@classmethod
def kernel_annotate_sequential_loops_openacc(cls, routine):
def annotate_sequential_loops(self, routine):
"""
Insert ``!$acc loop seq`` annotations for all loops previously
marked with ``!$loki loop seq``.
Expand All @@ -113,8 +107,7 @@ def kernel_annotate_sequential_loops_openacc(cls, routine):
if any('loop vector' in pragma.content for pragma in loop_pragmas):
info(f'[Loki-SCC::Annotate] Detected vector loop in sequential loop in {routine.name}')

@classmethod
def kernel_annotate_subroutine_present_openacc(cls, routine):
def annotate_kernel_routine(self, routine):
"""
Insert ``!$acc routine seq/vector`` directives and wrap
subroutine body in ``!$acc data present`` directives.
Expand All @@ -139,22 +132,20 @@ def kernel_annotate_subroutine_present_openacc(cls, routine):
# Add comment to prevent false-attachment in case it is preceded by an "END DO" statement
routine.body.append((ir.Comment(text=''), ir.Pragma(keyword='acc', content='end data')))

@classmethod
def insert_annotations(cls, routine, horizontal):

# Mark all parallel vector loops as `!$acc loop vector`
cls.kernel_annotate_vector_loops_openacc(routine)

# Mark all non-parallel loops as `!$acc loop seq`
cls.kernel_annotate_sequential_loops_openacc(routine)

# Wrap the routine body in `!$acc data present` markers
# to ensure device-resident data is used for array and struct arguments.
cls.kernel_annotate_subroutine_present_openacc(routine)

def transform_subroutine(self, routine, **kwargs):
"""
Apply SCCAnnotate utilities to a :any:`Subroutine`.
Apply OpenACC annotations according to ``!$loki`` placeholder
directives.
This routine effectively converts neutral ``!$loki loop`` and
``!$loki routine`` annotations into the corresponding
``!$acc`` equivalent directives. It also adds ``!$acc data
present`` clauses around kernel routine bodies and adds
``private`` clauses to loop annotations.
If the ``directive`` provided is not ``openacc``, no change is
applied. In the future, we aim to support ``OpenMP``
equivalent directives here.
Parameters
----------
Expand All @@ -167,54 +158,39 @@ def transform_subroutine(self, routine, **kwargs):
role = kwargs['role']
targets = as_tuple(kwargs.get('targets'))

if not self.directive == 'openacc':
return

if role == 'kernel':
self.process_kernel(routine)
if role == 'driver':
self.process_driver(routine, targets=targets)
# Bail if this routine has been processed before
for p in FindNodes(ir.Pragma).visit(routine.ir):
# Check if `!$acc routine` has already been added
if p.keyword.lower() == 'acc' and 'routine' in p.content.lower():
return

def process_kernel(self, routine):
"""
Applies the SCCAnnotate utilities to a "kernel". This consists of inserting the relevant
``'openacc'`` annotations at the :any:`Loop` and :any:`Subroutine` level.
# Mark all parallel vector loops as `!$acc loop vector`
self.annotate_vector_loops(routine)

Parameters
----------
routine : :any:`Subroutine`
Subroutine to apply this transformation to.
"""
# Mark all non-parallel loops as `!$acc loop seq`
self.annotate_sequential_loops(routine)

# Bail if this routine has been processed before
for p in FindNodes(ir.Pragma).visit(routine.ir):
# Check if `!$acc routine` has already been added
if p.keyword.lower() == 'acc' and 'routine' in p.content.lower():
return
# Wrap the routine body in `!$acc data present` markers to
# ensure all arguments are device-resident.
self.annotate_kernel_routine(routine)

if self.directive == 'openacc':
self.insert_annotations(routine, self.horizontal)

def process_driver(self, routine, targets=None):
"""
Apply the relevant ``'openacc'`` annotations to the driver loop.
Parameters
----------
routine : :any:`Subroutine`
Subroutine to apply this transformation to.
targets : list or string
List of subroutines that are to be considered as part of
the transformation call tree.
"""
if role == 'driver':
# Mark all parallel vector loops as `!$acc loop vector`
self.annotate_vector_loops(routine)

# Mark all parallel vector loops as `!$acc loop vector`
self.kernel_annotate_vector_loops_openacc(routine)
# Mark all non-parallel loops as `!$acc loop seq`
self.annotate_sequential_loops(routine)

# Mark all non-parallel loops as `!$acc loop seq`
self.kernel_annotate_sequential_loops_openacc(routine)
with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
for loop in driver_loops:
self.annotate_driver_loop(loop)

with pragmas_attached(routine, ir.Loop, attach_pragma_post=True):
driver_loops = find_driver_loops(routine=routine, targets=targets)
for loop in driver_loops:
self.annotate_driver(self.directive, loop, self.block_dim)

@classmethod
def device_alloc_column_locals(cls, routine, column_locals):
Expand All @@ -237,42 +213,33 @@ def device_alloc_column_locals(cls, routine, column_locals):
routine.body.prepend((ir.Comment(''), pragma, ir.Comment('')))
routine.body.append((ir.Comment(''), pragma_post, ir.Comment('')))

@classmethod
def annotate_driver(cls, directive, driver_loop, block_dim):
def annotate_driver_loop(self, loop):
"""
Annotate driver block loop with ``'openacc'`` pragmas.
Parameters
----------
directive : string or None
Directives flavour to use for parallelism annotations; either
``'openacc'`` or ``None``.
driver_loop : :any:`Loop`
Driver ``Loop`` to wrap in ``'opencc'`` pragmas.
kernel_loops : list of :any:`Loop`
Vector ``Loop`` to wrap in ``'opencc'`` pragmas if hoisting is enabled.
block_dim : :any:`Dimension`
Optional ``Dimension`` object to define the blocking dimension
to detect hoisted temporary arrays and excempt them from marking.
loop : :any:`Loop`
Driver :any:`Loop` to wrap in ``'opencc'`` pragmas.
"""

# Mark driver loop as "gang parallel".
if directive == 'openacc':
arrays = FindVariables(unique=True).visit(driver_loop)
if self.directive == 'openacc':
arrays = FindVariables(unique=True).visit(loop)
arrays = [v for v in arrays if isinstance(v, sym.Array)]
arrays = [v for v in arrays if not v.type.intent]
arrays = [v for v in arrays if not v.type.pointer]

# Filter out arrays that are explicitly allocated with block dimension
sizes = block_dim.size_expressions
sizes = self.block_dim.size_expressions
arrays = [v for v in arrays if not any(d in sizes for d in as_tuple(v.shape))]
private_arrays = ', '.join(set(v.name for v in arrays))
private_clause = '' if not private_arrays else f' private({private_arrays})'

for pragma in as_tuple(driver_loop.pragma):
for pragma in as_tuple(loop.pragma):
if is_loki_pragma(pragma, starts_with='loop driver'):
# Replace `!$loki loop driver` pragma with OpenACC equivalent
params = get_pragma_parameters(driver_loop.pragma, starts_with='loop driver')
params = get_pragma_parameters(loop.pragma, starts_with='loop driver')
vlength = params.get('vector_length')
vlength_clause = f' vector_length({vlength})' if vlength else ''

Expand All @@ -281,8 +248,8 @@ def annotate_driver(cls, directive, driver_loop, block_dim):
pragma_post = ir.Pragma(keyword='acc', content='end parallel loop')

# Replace existing loki pragma and add post-pragma
loop_pragmas = tuple(p for p in as_tuple(driver_loop.pragma) if p is not pragma)
driver_loop._update(
loop_pragmas = tuple(p for p in as_tuple(loop.pragma) if p is not pragma)
loop._update(
pragma=loop_pragmas + (pragma_new,),
pragma_post=(pragma_post,) + as_tuple(driver_loop.pragma_post)
pragma_post=(pragma_post,) + as_tuple(loop.pragma_post)
)
15 changes: 4 additions & 11 deletions loki/transformations/single_column/tests/test_scc.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,7 @@ def test_scc_annotate_openacc(frontend, horizontal, blocking):
scc_transform = (SCCDevectorTransformation(horizontal=horizontal),)
scc_transform += (SCCDemoteTransformation(horizontal=horizontal),)
scc_transform += (SCCRevectorTransformation(horizontal=horizontal),)
scc_transform += (SCCAnnotateTransformation(horizontal=horizontal,
directive='openacc', block_dim=blocking),)
scc_transform += (SCCAnnotateTransformation(directive='openacc', block_dim=blocking),)
for transform in scc_transform:
transform.apply(driver, role='driver', targets=['compute_column'])
transform.apply(kernel, role='kernel')
Expand Down Expand Up @@ -407,9 +406,7 @@ def test_scc_nested(frontend, horizontal, blocking):
scc_pipeline.apply(inner_kernel, role='kernel')

# Apply annotate twice to test bailing out mechanism
scc_annotate = SCCAnnotateTransformation(
horizontal=horizontal, directive='openacc', block_dim=blocking
)
scc_annotate = SCCAnnotateTransformation(directive='openacc', block_dim=blocking)
scc_annotate.apply(driver, role='driver', targets=['compute_column'])
scc_annotate.apply(outer_kernel, role='kernel', targets=['compute_q'])
scc_annotate.apply(inner_kernel, role='kernel')
Expand Down Expand Up @@ -782,12 +779,8 @@ def test_scc_annotate_routine_seq_pragma(frontend, horizontal, blocking):
assert pragmas[0].keyword == 'loki'
assert pragmas[0].content == 'routine seq'

transformation = SCCAnnotateTransformation(
horizontal=horizontal, directive='openacc', block_dim=blocking
)
transformation.transform_subroutine(
routine, role='kernel', targets=['some_kernel',]
)
transformation = SCCAnnotateTransformation(directive='openacc', block_dim=blocking)
transformation.transform_subroutine(routine, role='kernel', targets=['some_kernel',])

pragmas = FindNodes(Pragma).visit(routine.spec)
assert len(pragmas) == 1
Expand Down
4 changes: 1 addition & 3 deletions loki/transformations/single_column/tests/test_scc_hoist.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,7 @@ def test_scc_hoist_multiple_kernels_loops(tmp_path, frontend, trim_vector_sectio
transformation += (SCCDevectorTransformation(horizontal=horizontal, trim_vector_sections=trim_vector_sections),)
transformation += (SCCDemoteTransformation(horizontal=horizontal),)
transformation += (SCCRevectorTransformation(horizontal=horizontal),)
transformation += (SCCAnnotateTransformation(
horizontal=horizontal, directive='openacc', block_dim=blocking,
),)
transformation += (SCCAnnotateTransformation(directive='openacc', block_dim=blocking),)
for transform in transformation:
scheduler.process(transformation=transform)

Expand Down

0 comments on commit bce73e9

Please sign in to comment.