From 7996d8915e0a31cfc0f9b5245f39b01c13802e1a Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Sun, 7 Jan 2024 15:14:07 +0100 Subject: [PATCH 1/9] Fixed operand style issues --- .github/workflows/test-n-publish.yml | 3 +- kerncraft/incore_model.py | 162 ++++++++++++++------------- kerncraft/kerncraft.py | 2 +- tests/test_incore_model.py | 10 +- 4 files changed, 91 insertions(+), 86 deletions(-) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index 7d9f6c2..3924aa2 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -20,7 +20,8 @@ jobs: python -m pip install --upgrade pip python -m pip install codecov requests sympy python -m pip install -e . - iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul + python -m pip install git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm + #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul - name: Test run: | coverage run -p tests/all_tests.py diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py index 6561f5a..4b75a25 100755 --- a/kerncraft/incore_model.py +++ b/kerncraft/incore_model.py @@ -21,6 +21,10 @@ from osaca.parser import get_parser from osaca.semantics import MachineModel, ISASemantics from osaca.semantics.marker_utils import find_basic_loop_bodies, get_marker +from osaca.parser.register import RegisterOperand +from osaca.parser.memory import MemoryOperand +from osaca.parser.immediate import ImmediateOperand +from osaca.parser.identifier import IdentifierOperand from kerncraft import iaca_get, __version__ @@ -104,9 +108,9 @@ def compute_block_metric(block): # Count registers used for prefix in register_class_usage: for op in line.operands: - if 'register' in op: - if op.register.name.startswith(prefix): - register_class_usage[prefix].append(op.register.name) + if isinstance(op, RegisterOperand): + if op.name.startswith(prefix): + register_class_usage[prefix].append(op.name) # Identify and count packed and avx instructions if re.match(r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", @@ -136,9 +140,9 @@ def get_pointer_increment(block): continue # Extract destination references, ignoring var(%rip) - dst_mem_references = [op.memory for op in line.semantic_operands.destination - if 'memory' in op and - (op.memory.base is None or op.memory.base.name != 'rip')] + dst_mem_references = [op for op in line.semantic_operands["destination"] + if isinstance(op, MemoryOperand) and + (op.base is None or op.base.name != 'rip')] if dst_mem_references: if not stores_only: stores_only = True @@ -147,43 +151,43 @@ def get_pointer_increment(block): # If no destination references were found sofar, include source references if not stores_only: - mem_references += [op.memory for op in line.semantic_operands.source - if 'memory' in op] + mem_references += [op for op in line.semantic_operands["source"] + if isinstance(op, MemoryOperand)] if re.match(r'^inc[bwlq]?$', line.instruction): - reg = line.operands[0].register.name + reg = line.operands[0].name modified_registers.append(reg) increments[reg] = 1 - elif re.match(r'^add[bwlq]?$', line.instruction) and 'immediate' in line.operands[0] \ - and 'register' in line.operands[1]: - reg = line.operands[1].register.name - increments[reg] = int(line.operands[0].immediate.value) + elif re.match(r'^add[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ + and isinstance(line.operands[1], RegisterOperand): + reg = line.operands[1].name + increments[reg] = int(line.operands[0].value) modified_registers.append(reg) elif re.match(r'^dec[bwlq]?$', line.instruction): - reg = line.operands[0].register.name + reg = line.operands[0].name modified_registers.append(reg) increments[reg] = -1 - elif re.match(r'^sub[bwlq]?$', line.instruction) and 'immediate' in line.operands[0] \ - and 'register' in line.operands[1]: - reg = line.operands[1].register.name + elif re.match(r'^sub[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ + and isinstance(line.operands[1], RegisterOperand): + reg = line.operands[1].name modified_registers.append(reg) - increments[reg] = -int(line.operands[0].immediate.value) + increments[reg] = -int(line.operands[0].value) elif re.match(r'^lea[bwlq]?$', line.instruction): # `lea 1(%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].memory.base is not None and \ - line.operands[0].memory.base.name == line.operands[1].register.name and \ - line.operands[0].memory.index is None: - reg = line.operands[1].register.name + if line.operands[0].base is not None and \ + line.operands[0].base.name == line.operands[1].name and \ + line.operands[0].index is None: + reg = line.operands[1].name modified_registers.append(reg) increments[reg] = int( - line.operands[0].memory.offset.value) + line.operands[0].offset.value) # `lea 1(,%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].memory.index is not None and \ - line.operands[0].memory.index.name == line.operands[1].register.name and \ - line.operands[0].memory.base is None: - reg = line.operands[1].register.name + if line.operands[0].index is not None and \ + line.operands[0].index.name == line.operands[1].name and \ + line.operands[0].base is None: + reg = line.operands[1].name modified_registers.append(reg) increments[reg] = int( - line.operands[0].memory.offset.value) + line.operands[0].offset.value) # deduce loop increment from memory index register pointer_increment = None # default -> can not decide, let user choose @@ -248,10 +252,10 @@ def compute_block_metric(block): iarithmetic_ctr += 1 # Counting use of vector registers for op in line.operands: - if 'register' in op and 'prefix' in op.register and op.register.prefix in 'zv': - vector_ctr += 1 - if 'register' in op and 'range' in op.register and op.register.range[0].prefix in 'zv': + if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in 'zv': vector_ctr += 1 + #if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv': + # vector_ctr += 1 # Count all instructions instruction_ctr += 1 @@ -276,24 +280,24 @@ def get_pointer_increment(block): # build dict of modified registers in block with count of number of modifications modified_registers = defaultdict(int) - for dests in [l.semantic_operands.destination for l in block if 'semantic_operands' in l]: + for dests in [l.semantic_operands["destination"] for l in block]: for d in dests: - if 'register' in d: - if 'range' in d.register: - modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1 - else: - modified_registers[AArch64.normalize_to_register_str(d.register)] += 1 + if isinstance(d, RegisterOperand): + #if 'range' in d.register: + # modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1 + #else: + modified_registers[AArch64.normalize_to_register_str(d)] += 1 for l in block: for d in l.operands: - if 'memory' in d: - if 'post_indexed' in d.memory or 'pre_indexed' in d.memory: - modified_registers[AArch64.normalize_to_register_str(d.memory.base)] += 1 + if isinstance(d, MemoryOperand): + if d.post_indexed is not False or d.pre_indexed: + modified_registers[AArch64.normalize_to_register_str(d.base)] += 1 inc = 1 - if 'post_indexed' in d.memory and 'value' in d.memory.post_indexed: - inc = int(d.memory.post_indexed.value) - if 'pre_indexed' in d.memory: - inc = int(d.memory.offset.value) - increments[AArch64.normalize_to_register_str(d.memory.base)] = inc + if isinstance(d.post_indexed, dict): + inc = int(d.post_indexed["value"]) + if d.pre_indexed: + inc = int(d.offset.value) + increments[AArch64.normalize_to_register_str(d.base)] = inc for line in block: # Skip non-instruction lines (such as comments and labels) @@ -302,16 +306,16 @@ def get_pointer_increment(block): # Extract and filter destination references (stores) dst_mem_references = [] - for dst in [op.memory for op in chain(line.semantic_operands.destination, - line.semantic_operands.src_dst) - if 'memory' in op]: + for dst in [op for op in chain(line.semantic_operands["destination"], + line.semantic_operands["src_dst"]) + if isinstance(op, MemoryOperand)]: # base or index must be a modified (i.e., changing) register if AArch64.normalize_to_register_str(dst.base) not in modified_registers and \ AArch64.normalize_to_register_str(dst.index) not in modified_registers: continue # offset operands with identifiers (e.g. `:lo12:gosa`) are ignored - if dst.offset is not None and 'identifier' in dst.offset: + if dst.offset is not None and isinstance(dst.offset, IdentifierOperand): continue dst_mem_references.append(dst) @@ -323,23 +327,23 @@ def get_pointer_increment(block): # If no destination references were found sofar, include source references (loads) if not stores_only: - mem_references += [op.memory for op in chain(line.semantic_operands.source, - line.semantic_operands.src_dst) - if 'memory' in op] + mem_references += [op for op in chain(line.semantic_operands["source"], + line.semantic_operands["src_dst"]) + if isinstance(op, MemoryOperand)] # ADD dest_reg, src_reg, immd if re.match(r'^add[s]?$', line.instruction) and \ line.operands[0] == line.operands[1] and \ - 'immediate' in line.operands[2]: - reg_name = AArch64.normalize_to_register_str(line.operands[0].register) - inc = int(line.operands[2].immediate.value) + isinstance(line.operands[2], ImmediateOperand): + reg_name = AArch64.normalize_to_register_str(line.operands[0]) + inc = int(line.operands[2].value) increments[reg_name] = inc # SUB dest_reg, src_reg, immd elif re.match(r'^sub[s]?$', line.instruction) and \ line.operands[0] == line.operands[1] and \ - 'immediate' in line.operands[2]: - reg_name = AArch64.normalize_to_register_str(line.operands[0].register) - inc = -int(line.operands[2].immediate.value) + isinstance(line.operands[2], ImmediateOperand): + reg_name = AArch64.normalize_to_register_str(line.operands[0]) + inc = -int(line.operands[2].value) if reg_name in increments and increments[reg_name] == inc: increments[reg_name] = inc @@ -352,11 +356,11 @@ def get_pointer_increment(block): if line.instruction is None: continue # LSL dest_reg, src_reg, immd - if re.match(r'^lsl$', line.instruction) and 'immediate' in line.operands[2] and \ - AArch64.normalize_to_register_str(line.operands[1].register) in increments: - increments[AArch64.normalize_to_register_str(line.operands[0].register)] = \ - increments[AArch64.normalize_to_register_str(line.operands[1].register)] * \ - 2**int(line.operands[2].immediate.value) + if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \ + AArch64.normalize_to_register_str(line.operands[1]) in increments: + increments[AArch64.normalize_to_register_str(line.operands[0])] = \ + increments[AArch64.normalize_to_register_str(line.operands[1])] * \ + 2**int(line.operands[2].value) new_increments = [] # Third pass to find registers based on constant +- increment @@ -370,13 +374,13 @@ def get_pointer_increment(block): factor = 1 else: factor = -1 - if 'register' not in line.operands[1] or 'register' not in line.operands[2]: + if not isinstance(line.operands[1], RegisterOperand) or not isinstance(line.operands[2], RegisterOperand): continue for i,j in [(1,2), (2,1)]: - reg_i_name = AArch64.normalize_to_register_str(line.operands[i].register) - reg_j_name = AArch64.normalize_to_register_str(line.operands[j].register) + reg_i_name = AArch64.normalize_to_register_str(line.operands[i]) + reg_j_name = AArch64.normalize_to_register_str(line.operands[j]) if reg_i_name in increments and reg_j_name not in modified_registers: - reg_dest_name = AArch64.normalize_to_register_str(line.operands[0].register) + reg_dest_name = AArch64.normalize_to_register_str(line.operands[0]) inc = factor * increments[reg_i_name] if reg_dest_name in increments and increments[reg_dest_name] == inc: modified_registers[reg_dest_name] -= 1 @@ -392,13 +396,13 @@ def get_pointer_increment(block): if line.instruction is None: continue # LSL dest_reg, src_reg, immd - if re.match(r'^lsl$', line.instruction) and 'immediate' in line.operands[2] and \ - 'register' in line.operands[1]: - src_reg_name = AArch64.normalize_to_register_str(line.operands[1].register) + if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \ + isinstance(line.operands[1], RegisterOperand): + src_reg_name = AArch64.normalize_to_register_str(line.operands[1]) if src_reg_name in new_increments and src_reg_name in increments: - increments[AArch64.normalize_to_register_str(line.operands[0].register)] = \ + increments[AArch64.normalize_to_register_str(line.operands[0])] = \ increments[src_reg_name] * \ - 2**int(line.operands[2].immediate.value) + 2**int(line.operands[2].value) # deduce loop increment from memory index register address_registers = [] @@ -412,8 +416,8 @@ def get_pointer_increment(block): if index_reg in increments: reg = index_reg # If index is used, a scale other than 1 needs to be considered - if 'shift' in mref.index and mref.index.shift: - scales[reg] = 2**int(mref.index.shift[0].value) + if mref.index.shift: + scales[reg] = 2**int(mref.index.shift[0]['value']) else: reg = base_reg else: @@ -467,7 +471,7 @@ def userselect_block(blocks, default=None, debug=False): # Blocks first line is the label, the user will be able to spot it, so we don't need to # print it label_list.append(label) - print('\n\t'.join([b['line'] for b in block])) + print('\n\t'.join([b.line for b in block])) # Show all possible block labels in the end print( @@ -490,7 +494,7 @@ def hashblock(block): # TODO normalize register names # TODO normalize instruction order # Remove target label and jump - h = md5('\n'.join([b['line'] for b in block]).encode()) + h = md5('\n'.join([b.line for b in block]).encode()) return h.hexdigest() @@ -614,7 +618,7 @@ def asm_instrumentation(input_file, output_file=None, marker_end + asm_lines[block_end:] if output_file is not None: - output_file.writelines([l['line']+'\n' for l in marked_asm]) + output_file.writelines([l.line+'\n' for l in marked_asm]) return block_lines, pointer_increment @@ -668,7 +672,7 @@ def osaca_analyse_instrumented_assembly( result['port cycles'] = OrderedDict(list(zip(osaca_machine_model['ports'], throughput_values))) result['throughput'] = max(throughput_values + [max_lcd]) result['lcd'] = max_lcd - result['cp_latency'] = sum([x['latency_cp'] for x in cp_list]) + result['cp_latency'] = sum([x.latency_cp for x in cp_list]) result['uops'] = None # Not given by OSACA unmatched_ratio = osaca.get_unmatched_instruction_ratio(kernel) @@ -861,4 +865,4 @@ def main(): if __name__ == '__main__': - main() + main() \ No newline at end of file diff --git a/kerncraft/kerncraft.py b/kerncraft/kerncraft.py index 54e5d33..5cc81ba 100755 --- a/kerncraft/kerncraft.py +++ b/kerncraft/kerncraft.py @@ -204,7 +204,7 @@ def create_parser(): 'description file (-std=c99 is always added).') # Needed for ECM and RooflineASM models: - parser.add_argument('--incore-model', '-i', type=str, default=None, + parser.add_argument('--incore-model', '-i', type=str, default="OSACA", help='In-core model to use, default is first in machine description file.') for m in models.__all__: diff --git a/tests/test_incore_model.py b/tests/test_incore_model.py index 2a9c44c..8b7937b 100644 --- a/tests/test_incore_model.py +++ b/tests/test_incore_model.py @@ -22,28 +22,28 @@ def test_2d5pt_constcoeffs(self): with open(self._find_file('2d-5pt-constcoeffs.s')) as f: block_lines, pointer_increment = asm_instrumentation(f) - self.assertEqual(block_lines[0]['label'], '.L36') + self.assertEqual(block_lines[0].label, '.L36') self.assertEqual(pointer_increment, 8) def test_2d5pt_varcoeffs(self): with open(self._find_file('2d-5pt-varcoeffs.s')) as f: block_lines, pointer_increment = asm_instrumentation(f) - self.assertEqual(block_lines[0]['label'], '.L43') + self.assertEqual(block_lines[0].label, '.L43') self.assertEqual(pointer_increment, 16) def test_3d25pt_semi(self): with open(self._find_file('3d-25pt_semi.s')) as f: block_lines, pointer_increment = asm_instrumentation(f, pointer_increment=8) - self.assertEqual(block_lines[0]['label'], 'LBB0_62') + self.assertEqual(block_lines[0].label, 'LBB0_62') #self.assertEqual(pointer_increment, 8) def test_matvec_trans(self): with open(self._find_file('matvec_trans.s')) as f: block_lines, pointer_increment = asm_instrumentation(f) - self.assertEqual(block_lines[0]['label'], 'LBB0_30') + self.assertEqual(block_lines[0].label, 'LBB0_30') self.assertEqual(pointer_increment, 64) def test_increment_detection_x86(self): @@ -905,4 +905,4 @@ def test_increment_detection_aarch64(self): if __name__ == '__main__': suite = unittest.TestLoader().loadTestsFromTestCase(TestIncoreModelX86) - unittest.TextTestRunner(verbosity=2, buffer=True).run(suite) + unittest.TextTestRunner(verbosity=2, buffer=True).run(suite) \ No newline at end of file From d7c9be65bc55a690461ff3dfb5430dc3e2581a86 Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Sun, 7 Jan 2024 15:20:58 +0100 Subject: [PATCH 2/9] Trying to build the right branch of OSACA --- .github/workflows/test-n-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index 3924aa2..0331b07 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -20,7 +20,7 @@ jobs: python -m pip install --upgrade pip python -m pip install codecov requests sympy python -m pip install -e . - python -m pip install git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm + python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul - name: Test run: | From 4d76e608e4be4333fefd5b08fa03fe60d1f2dc0c Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Wed, 10 Jan 2024 14:09:39 +0100 Subject: [PATCH 3/9] Changed order of pip installs --- .github/workflows/test-n-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index 0331b07..ee47a74 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -18,9 +18,9 @@ jobs: - name: Install run: | python -m pip install --upgrade pip + python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" python -m pip install codecov requests sympy python -m pip install -e . - python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul - name: Test run: | From 4e30ae598c3cb959e09b0150f19dae834adf6dff Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Wed, 10 Jan 2024 14:40:05 +0100 Subject: [PATCH 4/9] Try with PyYAML --- .github/workflows/test-n-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index ee47a74..a1fe18b 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -18,6 +18,7 @@ jobs: - name: Install run: | python -m pip install --upgrade pip + python -m pip install -U PyYAML python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" python -m pip install codecov requests sympy python -m pip install -e . From 6d06b20db38634381c18c28b2392dc3a30c5a79b Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Thu, 11 Jan 2024 17:08:29 +0100 Subject: [PATCH 5/9] Merged master --- .github/workflows/test-n-publish.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index e633412..a1fe18b 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -22,10 +22,7 @@ jobs: python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" python -m pip install codecov requests sympy python -m pip install -e . -<<<<<<< HEAD #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul -======= ->>>>>>> master - name: Test run: | coverage run -p tests/all_tests.py From b6a3af8c97b1c314d6930080dd064f298656adfe Mon Sep 17 00:00:00 2001 From: stefandesouza Date: Tue, 5 Mar 2024 12:05:57 +0100 Subject: [PATCH 6/9] Updated instruction form attributes --- kerncraft/incore_model.py | 544 ++++++++++++++++++++++---------------- 1 file changed, 320 insertions(+), 224 deletions(-) diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py index 4b75a25..0246411 100755 --- a/kerncraft/incore_model.py +++ b/kerncraft/incore_model.py @@ -30,11 +30,11 @@ def itemsEqual(lst): - return lst[1:] == lst[:-1] + return lst[1:] == lst[:-1] class IncoreModel: - def __init__(self, isa='x86'): + def __init__(self, isa="x86"): isa @@ -52,10 +52,10 @@ class LlvmMCA(IncoreModel): class ISA: @staticmethod - def get_isa(isa='x86'): - if isa.lower() == 'x86': + def get_isa(isa="x86"): + if isa.lower() == "x86": return x86 - elif isa.lower() == 'aarch64': + elif isa.lower() == "aarch64": return AArch64 @staticmethod @@ -95,12 +95,12 @@ class x86(ISA): @staticmethod def compute_block_metric(block): """Return comparable metric on block information.""" - register_class_usage = {'zmm': [], 'ymm': [], 'xmm': []} + register_class_usage = {"zmm": [], "ymm": [], "xmm": []} packed_instruction_ctr, avx_instruction_ctr, instruction_ctr = 0, 0, 0 # Analyze code to determine metric for line in block: # Skip non-instruction lines (e.g., comments) - if line.instruction is None: + if line.mnemonic is None: continue # Count all instructions instruction_ctr += 1 @@ -113,18 +113,22 @@ def compute_block_metric(block): register_class_usage[prefix].append(op.name) # Identify and count packed and avx instructions - if re.match(r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", - line.instruction): - if line.instruction.startswith('v'): + if re.match( + r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", line.mnemonic + ): + if line.mnemonic.startswith("v"): avx_instruction_ctr += 1 packed_instruction_ctr += 1 # Build metric - return (packed_instruction_ctr, avx_instruction_ctr, - len(set(register_class_usage['zmm'])), - len(set(register_class_usage['ymm'])), - len(set(register_class_usage['xmm'])), - instruction_ctr) + return ( + packed_instruction_ctr, + avx_instruction_ctr, + len(set(register_class_usage["zmm"])), + len(set(register_class_usage["ymm"])), + len(set(register_class_usage["xmm"])), + instruction_ctr, + ) @staticmethod def get_pointer_increment(block): @@ -136,13 +140,15 @@ def get_pointer_increment(block): modified_registers = [] for line in block: # Skip non-instruction lines (e.g., comments) - if line.instruction is None: + if line.mnemonic is None: continue # Extract destination references, ignoring var(%rip) - dst_mem_references = [op for op in line.semantic_operands["destination"] - if isinstance(op, MemoryOperand) and - (op.base is None or op.base.name != 'rip')] + dst_mem_references = [ + op + for op in line.semantic_operands["destination"] + if isinstance(op, MemoryOperand) and (op.base is None or op.base.name != "rip") + ] if dst_mem_references: if not stores_only: stores_only = True @@ -151,60 +157,76 @@ def get_pointer_increment(block): # If no destination references were found sofar, include source references if not stores_only: - mem_references += [op for op in line.semantic_operands["source"] - if isinstance(op, MemoryOperand)] - if re.match(r'^inc[bwlq]?$', line.instruction): + mem_references += [ + op for op in line.semantic_operands["source"] if isinstance(op, MemoryOperand) + ] + if re.match(r"^inc[bwlq]?$", line.mnemonic): reg = line.operands[0].name modified_registers.append(reg) increments[reg] = 1 - elif re.match(r'^add[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ - and isinstance(line.operands[1], RegisterOperand): + elif ( + re.match(r"^add[bwlq]?$", line.mnemonic) + and isinstance(line.operands[0], ImmediateOperand) + and isinstance(line.operands[1], RegisterOperand) + ): reg = line.operands[1].name increments[reg] = int(line.operands[0].value) modified_registers.append(reg) - elif re.match(r'^dec[bwlq]?$', line.instruction): + elif re.match(r"^dec[bwlq]?$", line.mnemonic): reg = line.operands[0].name modified_registers.append(reg) increments[reg] = -1 - elif re.match(r'^sub[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \ - and isinstance(line.operands[1], RegisterOperand): + elif ( + re.match(r"^sub[bwlq]?$", line.mnemonic) + and isinstance(line.operands[0], ImmediateOperand) + and isinstance(line.operands[1], RegisterOperand) + ): reg = line.operands[1].name modified_registers.append(reg) increments[reg] = -int(line.operands[0].value) - elif re.match(r'^lea[bwlq]?$', line.instruction): + elif re.match(r"^lea[bwlq]?$", line.mnemonic): # `lea 1(%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].base is not None and \ - line.operands[0].base.name == line.operands[1].name and \ - line.operands[0].index is None: + if ( + line.operands[0].base is not None + and line.operands[0].base.name == line.operands[1].name + and line.operands[0].index is None + ): reg = line.operands[1].name modified_registers.append(reg) - increments[reg] = int( - line.operands[0].offset.value) + increments[reg] = int(line.operands[0].offset.value) # `lea 1(,%r11), %r11` is the same as `add $1, %r11` - if line.operands[0].index is not None and \ - line.operands[0].index.name == line.operands[1].name and \ - line.operands[0].base is None: + if ( + line.operands[0].index is not None + and line.operands[0].index.name == line.operands[1].name + and line.operands[0].base is None + ): reg = line.operands[1].name modified_registers.append(reg) - increments[reg] = int( - line.operands[0].offset.value) + increments[reg] = int(line.operands[0].offset.value) # deduce loop increment from memory index register pointer_increment = None # default -> can not decide, let user choose possible_idx_regs = None if mem_references: # we found memory references to work with - possible_idx_regs = list(set(increments.keys()).intersection( - set([mref.base.name for mref in mem_references if mref.base is not None] + - [mref.index.name for mref in mem_references if mref.index is not None]))) + possible_idx_regs = list( + set(increments.keys()).intersection( + set( + [mref.base.name for mref in mem_references if mref.base is not None] + + [mref.index.name for mref in mem_references if mref.index is not None] + ) + ) + ) for mref in mem_references: for reg in list(possible_idx_regs): # Only consider references with two registers, where one could be an # index if None not in [mref.base, mref.index]: # One needs to mach, other registers will be excluded - if not ((mref.base is not None and reg == mref.base.name) or - (mref.index is not None and reg == mref.index.name)): + if not ( + (mref.base is not None and reg == mref.base.name) + or (mref.index is not None and reg == mref.index.name) + ): # reg can not be it possible_idx_regs.remove(reg) @@ -213,15 +235,19 @@ def get_pointer_increment(block): # good, exactly one register was found idx_reg = possible_idx_regs[0] elif possible_idx_regs and itemsEqual( - [increments[pidxreg] for pidxreg in possible_idx_regs]): + [increments[pidxreg] for pidxreg in possible_idx_regs] + ): # multiple were option found, but all have the same increment # use first match: idx_reg = possible_idx_regs[0] if idx_reg and modified_registers.count(idx_reg) == 1: - mem_scales = [mref.scale for mref in mem_references - if (mref.index is not None and idx_reg == mref.index.name) or - (mref.base is not None and idx_reg == mref.base.name)] + mem_scales = [ + mref.scale + for mref in mem_references + if (mref.index is not None and idx_reg == mref.index.name) + or (mref.base is not None and idx_reg == mref.base.name) + ] if itemsEqual(mem_scales): # good, all scales are equal @@ -243,32 +269,32 @@ def compute_block_metric(block): # Analyze code to determine metric for line in block: # Skip non-instruction lines (e.g., comments) - if line.instruction is None: + if line.mnemonic is None: continue # Counting basic arithmetic insstructions - if line.instruction in ['fmul', 'fdiv', 'fadd', 'fsub']: + if line.mnemonic in ["fmul", "fdiv", "fadd", "fsub"]: farithmetic_ctr += 1 - elif line.instruction in ['add', 'sub', 'mul']: + elif line.mnemonic in ["add", "sub", "mul"]: iarithmetic_ctr += 1 # Counting use of vector registers for op in line.operands: - if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in 'zv': + if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in "zv": vector_ctr += 1 - #if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv': + # if isinstance(op, RegisterOperand) and 'range' in op.register and op.register.range[0].prefix in 'zv': # vector_ctr += 1 # Count all instructions instruction_ctr += 1 # Build metric return (vector_ctr, farithmetic_ctr, iarithmetic_ctr, instruction_ctr) - + @staticmethod def normalize_to_register_str(register): if register is None: return None prefix = register.prefix - if prefix in 'wx': - prefix = 'x' + if prefix in "wx": + prefix = "x" return prefix + register.name @staticmethod @@ -283,9 +309,9 @@ def get_pointer_increment(block): for dests in [l.semantic_operands["destination"] for l in block]: for d in dests: if isinstance(d, RegisterOperand): - #if 'range' in d.register: + # if 'range' in d.register: # modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1 - #else: + # else: modified_registers[AArch64.normalize_to_register_str(d)] += 1 for l in block: for d in l.operands: @@ -298,20 +324,26 @@ def get_pointer_increment(block): if d.pre_indexed: inc = int(d.offset.value) increments[AArch64.normalize_to_register_str(d.base)] = inc - + for line in block: # Skip non-instruction lines (such as comments and labels) - if line.instruction is None: + if line.mnemonic is None: continue # Extract and filter destination references (stores) dst_mem_references = [] - for dst in [op for op in chain(line.semantic_operands["destination"], - line.semantic_operands["src_dst"]) - if isinstance(op, MemoryOperand)]: + for dst in [ + op + for op in chain( + line.semantic_operands["destination"], line.semantic_operands["src_dst"] + ) + if isinstance(op, MemoryOperand) + ]: # base or index must be a modified (i.e., changing) register - if AArch64.normalize_to_register_str(dst.base) not in modified_registers and \ - AArch64.normalize_to_register_str(dst.index) not in modified_registers: + if ( + AArch64.normalize_to_register_str(dst.base) not in modified_registers + and AArch64.normalize_to_register_str(dst.index) not in modified_registers + ): continue # offset operands with identifiers (e.g. `:lo12:gosa`) are ignored @@ -327,56 +359,72 @@ def get_pointer_increment(block): # If no destination references were found sofar, include source references (loads) if not stores_only: - mem_references += [op for op in chain(line.semantic_operands["source"], - line.semantic_operands["src_dst"]) - if isinstance(op, MemoryOperand)] + mem_references += [ + op + for op in chain( + line.semantic_operands["source"], line.semantic_operands["src_dst"] + ) + if isinstance(op, MemoryOperand) + ] # ADD dest_reg, src_reg, immd - if re.match(r'^add[s]?$', line.instruction) and \ - line.operands[0] == line.operands[1] and \ - isinstance(line.operands[2], ImmediateOperand): + if ( + re.match(r"^add[s]?$", line.mnemonic) + and line.operands[0] == line.operands[1] + and isinstance(line.operands[2], ImmediateOperand) + ): reg_name = AArch64.normalize_to_register_str(line.operands[0]) inc = int(line.operands[2].value) increments[reg_name] = inc # SUB dest_reg, src_reg, immd - elif re.match(r'^sub[s]?$', line.instruction) and \ - line.operands[0] == line.operands[1] and \ - isinstance(line.operands[2], ImmediateOperand): + elif ( + re.match(r"^sub[s]?$", line.mnemonic) + and line.operands[0] == line.operands[1] + and isinstance(line.operands[2], ImmediateOperand) + ): reg_name = AArch64.normalize_to_register_str(line.operands[0]) inc = -int(line.operands[2].value) if reg_name in increments and increments[reg_name] == inc: increments[reg_name] = inc # Remove any increments that are modiefed more than once - increments = {reg_name: inc for reg_name, inc in increments.items() - if modified_registers[reg_name] == 1} + increments = { + reg_name: inc + for reg_name, inc in increments.items() + if modified_registers[reg_name] == 1 + } # Second pass to find lsl instructions on increments for line in block: - if line.instruction is None: + if line.mnemonic is None: continue # LSL dest_reg, src_reg, immd - if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \ - AArch64.normalize_to_register_str(line.operands[1]) in increments: - increments[AArch64.normalize_to_register_str(line.operands[0])] = \ - increments[AArch64.normalize_to_register_str(line.operands[1])] * \ - 2**int(line.operands[2].value) + if ( + re.match(r"^lsl$", line.mnemonic) + and isinstance(line.operands[2], ImmediateOperand) + and AArch64.normalize_to_register_str(line.operands[1]) in increments + ): + increments[AArch64.normalize_to_register_str(line.operands[0])] = increments[ + AArch64.normalize_to_register_str(line.operands[1]) + ] * 2 ** int(line.operands[2].value) new_increments = [] # Third pass to find registers based on constant +- increment for line in block: - if line.instruction is None: + if line.mnemonic is None: continue # ADD|SUB dest_reg, const_reg, increment_reg (source registers may be switched) - m = re.match(r'^(add|sub)[s]?$', line.instruction) + m = re.match(r"^(add|sub)[s]?$", line.mnemonic) if m: - if m.group(1) == 'add': + if m.group(1) == "add": factor = 1 else: factor = -1 - if not isinstance(line.operands[1], RegisterOperand) or not isinstance(line.operands[2], RegisterOperand): + if not isinstance(line.operands[1], RegisterOperand) or not isinstance( + line.operands[2], RegisterOperand + ): continue - for i,j in [(1,2), (2,1)]: + for i, j in [(1, 2), (2, 1)]: reg_i_name = AArch64.normalize_to_register_str(line.operands[i]) reg_j_name = AArch64.normalize_to_register_str(line.operands[j]) if reg_i_name in increments and reg_j_name not in modified_registers: @@ -388,21 +436,27 @@ def get_pointer_increment(block): new_increments.append(reg_dest_name) # Remove any increments that are modified more often than updates have been detected - increments = {reg_name: inc for reg_name, inc in increments.items() - if modified_registers[reg_name] == 1} + increments = { + reg_name: inc + for reg_name, inc in increments.items() + if modified_registers[reg_name] == 1 + } # Last pass to find lsl instructions on increments for line in block: - if line.instruction is None: + if line.mnemonic is None: continue # LSL dest_reg, src_reg, immd - if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \ - isinstance(line.operands[1], RegisterOperand): + if ( + re.match(r"^lsl$", line.mnemonic) + and isinstance(line.operands[2], ImmediateOperand) + and isinstance(line.operands[1], RegisterOperand) + ): src_reg_name = AArch64.normalize_to_register_str(line.operands[1]) if src_reg_name in new_increments and src_reg_name in increments: - increments[AArch64.normalize_to_register_str(line.operands[0])] = \ - increments[src_reg_name] * \ - 2**int(line.operands[2].value) + increments[AArch64.normalize_to_register_str(line.operands[0])] = increments[ + src_reg_name + ] * 2 ** int(line.operands[2].value) # deduce loop increment from memory index register address_registers = [] @@ -417,7 +471,7 @@ def get_pointer_increment(block): reg = index_reg # If index is used, a scale other than 1 needs to be considered if mref.index.shift: - scales[reg] = 2**int(mref.index.shift[0]['value']) + scales[reg] = 2 ** int(mref.index.shift[0]["value"]) else: reg = base_reg else: @@ -443,16 +497,16 @@ def get_pointer_increment(block): def userselect_increment(block, default=None, comment=None): """Let user interactively select byte increment.""" print("Selected block:") - print('\n ' + ('\n '.join([b.line for b in block]))) - print('hash: ', hashblock(block)) + print("\n " + ("\n ".join([b.line for b in block]))) + print("hash: ", hashblock(block)) print() increment = None while increment is None: prompt = "Choose store pointer increment (number of bytes)" if default: - prompt += '[{}]'.format(default) - prompt += ': ' + prompt += "[{}]".format(default) + prompt += ": " increment = input(prompt) try: increment = int(increment) @@ -471,13 +525,10 @@ def userselect_block(blocks, default=None, debug=False): # Blocks first line is the label, the user will be able to spot it, so we don't need to # print it label_list.append(label) - print('\n\t'.join([b.line for b in block])) + print("\n\t".join([b.line for b in block])) # Show all possible block labels in the end - print( - '-----------------------------\n' - + 'Possible blocks to be marked:' - ) + print("-----------------------------\n" + "Possible blocks to be marked:") for label in label_list: print(label) @@ -494,14 +545,14 @@ def hashblock(block): # TODO normalize register names # TODO normalize instruction order # Remove target label and jump - h = md5('\n'.join([b.line for b in block]).encode()) + h = md5("\n".join([b.line for b in block]).encode()) return h.hexdigest() -def find_increment_in_cache(block, cache_file='~/.kerncraft/increment_cache'): +def find_increment_in_cache(block, cache_file="~/.kerncraft/increment_cache"): search_hash = hashblock(block) cache_file = expanduser(cache_file) - cache = '' + cache = "" if os.path.exists(cache_file): with open(cache_file) as f: cache = f.readlines() @@ -520,14 +571,15 @@ def find_increment_in_cache(block, cache_file='~/.kerncraft/increment_cache'): def store_increment_to_cache( - block, pointer_increment, cache_file='~/.kerncraft/increment_cache', comment=None): + block, pointer_increment, cache_file="~/.kerncraft/increment_cache", comment=None +): cache_file = expanduser(cache_file) pathlib.Path(cache_file).parents[0].mkdir(parents=True, exist_ok=True) line = "{} {}".format(hashblock(block), pointer_increment) if comment: line += " #{}".format(comment) - with open(cache_file, 'a') as f: - f.write(line+"\n") + with open(cache_file, "a") as f: + f.write(line + "\n") def parse_asm(code, isa): @@ -538,11 +590,15 @@ def parse_asm(code, isa): return asm_lines -def asm_instrumentation(input_file, output_file=None, - block_selection='auto', - pointer_increment='auto_with_manual_fallback', - debug=False, - isa='x86', cache=True): +def asm_instrumentation( + input_file, + output_file=None, + block_selection="auto", + pointer_increment="auto_with_manual_fallback", + debug=False, + isa="x86", + cache=True, +): """ Add markers to an assembly file. @@ -569,14 +625,15 @@ def asm_instrumentation(input_file, output_file=None, output_file.truncate() if debug: - block_selection = 'manual' + block_selection = "manual" loop_blocks = find_basic_loop_bodies(asm_lines) - if block_selection == 'auto': + if block_selection == "auto": block_label = ISA.get_isa(isa).select_best_block(loop_blocks) - elif block_selection == 'manual': + elif block_selection == "manual": block_label = userselect_block( - loop_blocks, default=ISA.get_isa(isa).select_best_block(loop_blocks), debug=debug) + loop_blocks, default=ISA.get_isa(isa).select_best_block(loop_blocks), debug=debug + ) elif isinstance(block_selection, int): block_label = block_selection else: @@ -589,43 +646,54 @@ def asm_instrumentation(input_file, output_file=None, # Extract store pointer increment if not isinstance(pointer_increment, int): - if pointer_increment == 'auto': + if pointer_increment == "auto": pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines) if pointer_increment is None: if output_file is not None: os.unlink(output_file.name) - raise RuntimeError("pointer_increment could not be detected automatically. Use " - "--pointer-increment to set manually to byte offset of store " - "pointer address between consecutive assembly block iterations. " - "Alternativley add the following line to ~/.kerncraft/" - "increment_cache: {} ".format(block_hashstr)) - elif pointer_increment == 'auto_with_manual_fallback': + raise RuntimeError( + "pointer_increment could not be detected automatically. Use " + "--pointer-increment to set manually to byte offset of store " + "pointer address between consecutive assembly block iterations. " + "Alternativley add the following line to ~/.kerncraft/" + "increment_cache: {} ".format(block_hashstr) + ) + elif pointer_increment == "auto_with_manual_fallback": pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines) if pointer_increment is None: pointer_increment = userselect_increment(block_lines, comment=input_file) - elif pointer_increment == 'manual': + elif pointer_increment == "manual": pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines) pointer_increment = userselect_increment( - block_lines, default=pointer_increment, comment=input_file) + block_lines, default=pointer_increment, comment=input_file + ) else: - raise ValueError("pointer_increment has to be an integer, 'auto', 'manual' or " - "'auto_with_manual_fallback' ") + raise ValueError( + "pointer_increment has to be an integer, 'auto', 'manual' or " + "'auto_with_manual_fallback' " + ) marker_start, marker_end = get_marker( - isa, comment="pointer_increment={} {}".format(pointer_increment, block_hashstr)) + isa, comment="pointer_increment={} {}".format(pointer_increment, block_hashstr) + ) - marked_asm = asm_lines[:block_start] + marker_start + asm_lines[block_start:block_end] + \ - marker_end + asm_lines[block_end:] + marked_asm = ( + asm_lines[:block_start] + + marker_start + + asm_lines[block_start:block_end] + + marker_end + + asm_lines[block_end:] + ) if output_file is not None: - output_file.writelines([l.line+'\n' for l in marked_asm]) + output_file.writelines([l.line + "\n" for l in marked_asm]) return block_lines, pointer_increment def osaca_analyse_instrumented_assembly( - instrumented_assembly_file, micro_architecture, assign_optimal_throughput=True, - isa=None): + instrumented_assembly_file, micro_architecture, assign_optimal_throughput=True, isa=None +): """ Run OSACA analysis on an instrumented assembly. @@ -661,32 +729,33 @@ def osaca_analyse_instrumented_assembly( lcd_dict = kernel_graph.get_loopcarried_dependencies() max_lcd = 0 for dep in lcd_dict: - max_lcd = max( - max_lcd, - lcd_dict[dep]['latency']) + max_lcd = max(max_lcd, lcd_dict[dep]["latency"]) # Critical-Path Analysis cp_list = kernel_graph.get_critical_path() - result['output'] = frontend.full_analysis(kernel, kernel_graph, verbose=True) - result['analyzed kernel'] = kernel - result['port cycles'] = OrderedDict(list(zip(osaca_machine_model['ports'], throughput_values))) - result['throughput'] = max(throughput_values + [max_lcd]) - result['lcd'] = max_lcd - result['cp_latency'] = sum([x.latency_cp for x in cp_list]) - result['uops'] = None # Not given by OSACA + result["output"] = frontend.full_analysis(kernel, kernel_graph, verbose=True) + result["analyzed kernel"] = kernel + result["port cycles"] = OrderedDict(list(zip(osaca_machine_model["ports"], throughput_values))) + result["throughput"] = max(throughput_values + [max_lcd]) + result["lcd"] = max_lcd + result["cp_latency"] = sum([x.latency_cp for x in cp_list]) + result["uops"] = None # Not given by OSACA unmatched_ratio = osaca.get_unmatched_instruction_ratio(kernel) if unmatched_ratio > 0.1: - print('WARNING: {:.0%} of the instruction could not be matched during incore analysis ' - 'with OSACA. Fix this by extending OSACAs instruction form database with the ' - 'required instructions.'.format(unmatched_ratio), - file=sys.stderr) + print( + "WARNING: {:.0%} of the instruction could not be matched during incore analysis " + "with OSACA. Fix this by extending OSACAs instruction form database with the " + "required instructions.".format(unmatched_ratio), + file=sys.stderr, + ) return result def llvm_mca_analyse_instrumented_assembly( - instrumented_assembly_file, micro_architecture, isa='x86'): + instrumented_assembly_file, micro_architecture, isa="x86" +): """ Run LLVM-MCA analysis on an instrumented assembly. @@ -703,26 +772,28 @@ def llvm_mca_analyse_instrumented_assembly( with open(instrumented_assembly_file) as f: parsed_code = parse_asm(f.read(), isa) kernel = osaca.reduce_to_section(parsed_code, isa) - assembly_section = '\n'.join([l.line for l in kernel]) + assembly_section = "\n".join([l.line for l in kernel]) output = subprocess.check_output( - ['llvm-mca'] + micro_architecture.split(' ') + - ['--timeline', '--timeline-max-cycles=1000', '--timeline-max-iterations=4'], - input=assembly_section.encode('utf-8')).decode('utf-8') - result['output'] = output + ["llvm-mca"] + + micro_architecture.split(" ") + + ["--timeline", "--timeline-max-cycles=1000", "--timeline-max-iterations=4"], + input=assembly_section.encode("utf-8"), + ).decode("utf-8") + result["output"] = output # Extract port names port_names = OrderedDict() - m = re.search(r'Resources:\n(?:[^\n]+\n)+', output) - for m in re.finditer(r'(\[[0-9\.]+\])\s+-\s+([a-zA-Z0-9]+)', m.group()): + m = re.search(r"Resources:\n(?:[^\n]+\n)+", output) + for m in re.finditer(r"(\[[0-9\.]+\])\s+-\s+([a-zA-Z0-9]+)", m.group()): port_names[m.group(1)] = m.group(2) # Extract cycles per port port_cycles = OrderedDict() - m = re.search(r'Resource pressure per iteration:\n[^\n]+\n[^\n]+', output) - port_cycle_lines = m.group().split('\n')[1:] + m = re.search(r"Resource pressure per iteration:\n[^\n]+\n[^\n]+", output) + port_cycle_lines = m.group().split("\n")[1:] for port, cycles in zip(port_cycle_lines[0].split(), port_cycle_lines[1].split()): - if cycles == '-': + if cycles == "-": cycles = 0.0 if port_names[port] in port_cycles: # Some architecures have multiple "ports" per resource in LLVM-MCA @@ -731,29 +802,29 @@ def llvm_mca_analyse_instrumented_assembly( port_cycles[port_names[port]] = max(float(cycles), port_cycles[port_names[port]]) else: port_cycles[port_names[port]] = float(cycles) - result['port cycles'] = port_cycles - + result["port cycles"] = port_cycles + # Extract throughput including loop-carried-dependecy latency - total_cycles = int(re.search(r'Total Cycles:\s+([0-9]+)', output).group(1)) - iterations = int(re.search(r'Iterations:\s+([0-9]+)', output).group(1)) + total_cycles = int(re.search(r"Total Cycles:\s+([0-9]+)", output).group(1)) + iterations = int(re.search(r"Iterations:\s+([0-9]+)", output).group(1)) lcd = total_cycles / iterations - result['lcd'] = lcd - result['throughput'] = lcd + result["lcd"] = lcd + result["throughput"] = lcd # Extract critical path latency # find cycle distance between first D and last R in first iteration - timeline_lines = [l for l in output.split('\n') if re.match(r'\[[0-9]+,[0-9]+\]', l)] + timeline_lines = [l for l in output.split("\n") if re.match(r"\[[0-9]+,[0-9]+\]", l)] cp_start = float("inf") cp_end = 0 for l in timeline_lines: - if l.startswith('[0,'): - cp_start = min(l.index('D'), cp_start) - cp_end = max(l.index('R'), cp_end) - result['cp_latency'] = cp_end - cp_start + if l.startswith("[0,"): + cp_start = min(l.index("D"), cp_start) + cp_end = max(l.index("R"), cp_end) + result["cp_latency"] = cp_end - cp_start # Extract uops - total_uops = int(re.search(r'Total uOps:\s+([0-9]+)', output).group(1)) - result['uops'] = total_uops / iterations + total_uops = int(re.search(r"Total uOps:\s+([0-9]+)", output).group(1)) + result["uops"] = total_uops / iterations return result @@ -774,95 +845,120 @@ def iaca_analyse_instrumented_binary(instrumented_binary_file, micro_architectur # Select IACA version and executable based on micro_architecture: arch_map = { # arch: (binary name, version string, required additional arguments) - 'NHM': ('iaca2.2', 'v2.2', ['-64']), - 'WSM': ('iaca2.2', 'v2.2', ['-64']), - 'SNB': ('iaca2.3', 'v2.3', ['-64']), - 'IVB': ('iaca2.3', 'v2.3', ['-64']), - 'HSW': ('iaca3.0', 'v3.0', []), - 'BDW': ('iaca3.0', 'v3.0', []), - 'SKL': ('iaca3.0', 'v3.0', []), - 'SKX': ('iaca3.0', 'v3.0', []), + "NHM": ("iaca2.2", "v2.2", ["-64"]), + "WSM": ("iaca2.2", "v2.2", ["-64"]), + "SNB": ("iaca2.3", "v2.3", ["-64"]), + "IVB": ("iaca2.3", "v2.3", ["-64"]), + "HSW": ("iaca3.0", "v3.0", []), + "BDW": ("iaca3.0", "v3.0", []), + "SKL": ("iaca3.0", "v3.0", []), + "SKX": ("iaca3.0", "v3.0", []), } if micro_architecture not in arch_map: - raise ValueError('Invalid micro_architecture selected ({}), valid options are {}'.format( - micro_architecture, ', '.join(arch_map.keys()))) + raise ValueError( + "Invalid micro_architecture selected ({}), valid options are {}".format( + micro_architecture, ", ".join(arch_map.keys()) + ) + ) iaca_path = iaca_get.find_iaca() # Throws exception if not found - os.environ['PATH'] += ':' + iaca_path + os.environ["PATH"] += ":" + iaca_path iaca_exec, iaca_version, base_args = arch_map[micro_architecture] if find_executable(iaca_exec) is None: - raise RuntimeError("{0} executable was not found. Make sure that {0} is found in " - "{1}. Install using iaca_get.".format(iaca_exec, iaca_path)) + raise RuntimeError( + "{0} executable was not found. Make sure that {0} is found in " + "{1}. Install using iaca_get.".format(iaca_exec, iaca_path) + ) result = {} - cmd = [iaca_exec] + base_args + ['-arch', micro_architecture, instrumented_binary_file] + cmd = [iaca_exec] + base_args + ["-arch", micro_architecture, instrumented_binary_file] try: - iaca_output = subprocess.check_output(cmd).decode('utf-8') - result['output'] = iaca_output + iaca_output = subprocess.check_output(cmd).decode("utf-8") + result["output"] = iaca_output except OSError as e: - raise RuntimeError("IACA execution failed:" + ' '.join(cmd) + '\n' + str(e)) + raise RuntimeError("IACA execution failed:" + " ".join(cmd) + "\n" + str(e)) except subprocess.CalledProcessError as e: raise RuntimeError("IACA throughput analysis failed:" + str(e)) # Get total cycles per loop iteration - match = re.search(r'^Block Throughput: ([0-9.]+) Cycles', iaca_output, re.MULTILINE) + match = re.search(r"^Block Throughput: ([0-9.]+) Cycles", iaca_output, re.MULTILINE) assert match, "Could not find Block Throughput in IACA output." throughput = float(match.groups()[0]) - result['throughput'] = throughput + result["throughput"] = throughput # Find ports and cycles per port - ports = [l for l in iaca_output.split('\n') if l.startswith('| Port |')] - cycles = [l for l in iaca_output.split('\n') if l.startswith('| Cycles |')] + ports = [l for l in iaca_output.split("\n") if l.startswith("| Port |")] + cycles = [l for l in iaca_output.split("\n") if l.startswith("| Cycles |")] assert ports and cycles, "Could not find ports/cycles lines in IACA output." - ports = [p.strip() for p in ports[0].split('|')][2:] - cycles = [c.strip() for c in cycles[0].split('|')][2:] + ports = [p.strip() for p in ports[0].split("|")][2:] + cycles = [c.strip() for c in cycles[0].split("|")][2:] port_cycles = [] for i in range(len(ports)): - if '-' in ports[i] and ' ' in cycles[i]: - subports = [p.strip() for p in ports[i].split('-')] - subcycles = [c for c in cycles[i].split(' ') if bool(c)] + if "-" in ports[i] and " " in cycles[i]: + subports = [p.strip() for p in ports[i].split("-")] + subcycles = [c for c in cycles[i].split(" ") if bool(c)] port_cycles.append((subports[0], float(subcycles[0]))) port_cycles.append((subports[0] + subports[1], float(subcycles[1]))) elif ports[i] and cycles[i]: port_cycles.append((ports[i], float(cycles[i]))) - result['port cycles'] = OrderedDict(port_cycles) + result["port cycles"] = OrderedDict(port_cycles) - match = re.search(r'^Total Num Of Uops: ([0-9]+)', iaca_output, re.MULTILINE) + match = re.search(r"^Total Num Of Uops: ([0-9]+)", iaca_output, re.MULTILINE) assert match, "Could not find Uops in IACA output." - result['uops'] = float(match.groups()[0]) - result['cp_latency'] = None - result['lcd'] = None + result["uops"] = float(match.groups()[0]) + result["cp_latency"] = None + result["lcd"] = None return result def main(): """Execute command line interface.""" parser = argparse.ArgumentParser( - description='Find and analyze basic loop blocks and mark for IACA.', - epilog='For help, examples, documentation and bug reports go to:\nhttps://github.com' - '/RRZE-HPC/kerncraft\nLicense: AGPLv3') - parser.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) - parser.add_argument('source', type=argparse.FileType(), nargs='?', default=sys.stdin, - help='assembly file to analyze (default: stdin)') - parser.add_argument('--outfile', '-o', type=argparse.FileType('w'), nargs='?', - default=sys.stdout, help='output file location (default: stdout)') - parser.add_argument('--debug', action='store_true', - help='Output internal analysis information for debugging.') - parser.add_argument('--isa', default='x86', choices=['x86', 'aarch64']) - parser.add_argument('--cache', action='store_true', - help='Consult cache and store manual setting there.') + description="Find and analyze basic loop blocks and mark for IACA.", + epilog="For help, examples, documentation and bug reports go to:\nhttps://github.com" + "/RRZE-HPC/kerncraft\nLicense: AGPLv3", + ) + parser.add_argument("--version", action="version", version="%(prog)s {}".format(__version__)) + parser.add_argument( + "source", + type=argparse.FileType(), + nargs="?", + default=sys.stdin, + help="assembly file to analyze (default: stdin)", + ) + parser.add_argument( + "--outfile", + "-o", + type=argparse.FileType("w"), + nargs="?", + default=sys.stdout, + help="output file location (default: stdout)", + ) + parser.add_argument( + "--debug", action="store_true", help="Output internal analysis information for debugging." + ) + parser.add_argument("--isa", default="x86", choices=["x86", "aarch64"]) + parser.add_argument( + "--cache", action="store_true", help="Consult cache and store manual setting there." + ) args = parser.parse_args() # pointer_increment is given, since it makes no difference on the command lien and requires # less user input - pointer_increment = 'auto_with_manual_fallback' - asm_instrumentation(input_file=args.source, output_file=args.outfile, - block_selection='manual', pointer_increment='auto_with_manual_fallback', - debug=args.debug, isa=args.isa, cache=args.cache) + pointer_increment = "auto_with_manual_fallback" + asm_instrumentation( + input_file=args.source, + output_file=args.outfile, + block_selection="manual", + pointer_increment="auto_with_manual_fallback", + debug=args.debug, + isa=args.isa, + cache=args.cache, + ) -if __name__ == '__main__': - main() \ No newline at end of file +if __name__ == "__main__": + main() From dd17e73abbae904e4483c20f5f9a52fb4c11e912 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Thu, 2 May 2024 14:57:39 +0200 Subject: [PATCH 7/9] run the TP assignment twice for better scheduling --- kerncraft/incore_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py index 6561f5a..6bd56b0 100755 --- a/kerncraft/incore_model.py +++ b/kerncraft/incore_model.py @@ -647,6 +647,7 @@ def osaca_analyse_instrumented_assembly( semantics.add_semantics(kernel) if assign_optimal_throughput: semantics.assign_optimal_throughput(kernel) + semantics.assign_optimal_throughput(kernel) kernel_graph = osaca.KernelDG(kernel, parser, osaca_machine_model, semantics) frontend = osaca.Frontend(instrumented_assembly_file, arch=micro_architecture) From e6b6ec55d76b1019a51a753796ee32ec693a27dc Mon Sep 17 00:00:00 2001 From: JanLJL Date: Wed, 4 Sep 2024 11:14:07 +0200 Subject: [PATCH 8/9] removed unnecessary installs/imports --- .github/workflows/test-n-publish.yml | 3 --- kerncraft/incore_model.py | 4 ---- 2 files changed, 7 deletions(-) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index a1fe18b..824b1eb 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -18,11 +18,8 @@ jobs: - name: Install run: | python -m pip install --upgrade pip - python -m pip install -U PyYAML - python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm" python -m pip install codecov requests sympy python -m pip install -e . - #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul - name: Test run: | coverage run -p tests/all_tests.py diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py index c4ddd3c..40387f7 100755 --- a/kerncraft/incore_model.py +++ b/kerncraft/incore_model.py @@ -5,13 +5,9 @@ import re import subprocess import os -from copy import copy import argparse -from pprint import pformat, pprint import pathlib -import textwrap from collections import OrderedDict, defaultdict -import io from hashlib import md5 from os.path import expanduser from itertools import chain From 1fbd41cb8c386853c6f998124dba0b1e20680be9 Mon Sep 17 00:00:00 2001 From: JanLJL Date: Wed, 4 Sep 2024 12:13:36 +0200 Subject: [PATCH 9/9] added OSACA for GH Actions --- .github/workflows/test-n-publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml index 824b1eb..0e35354 100644 --- a/.github/workflows/test-n-publish.yml +++ b/.github/workflows/test-n-publish.yml @@ -19,6 +19,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install codecov requests sympy + python -m pip install "osaca>=0.6.0" python -m pip install -e . - name: Test run: |