From 7996d8915e0a31cfc0f9b5245f39b01c13802e1a Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Sun, 7 Jan 2024 15:14:07 +0100
Subject: [PATCH 1/9] Fixed operand style issues

---
 .github/workflows/test-n-publish.yml |   3 +-
 kerncraft/incore_model.py            | 162 ++++++++++++++-------------
 kerncraft/kerncraft.py               |   2 +-
 tests/test_incore_model.py           |  10 +-
 4 files changed, 91 insertions(+), 86 deletions(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index 7d9f6c2..3924aa2 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -20,7 +20,8 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install codecov requests sympy
         python -m pip install -e .
-        iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
+        python -m pip install git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm
+        #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
     - name: Test
       run: |
         coverage run -p tests/all_tests.py
diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py
index 6561f5a..4b75a25 100755
--- a/kerncraft/incore_model.py
+++ b/kerncraft/incore_model.py
@@ -21,6 +21,10 @@
 from osaca.parser import get_parser
 from osaca.semantics import MachineModel, ISASemantics
 from osaca.semantics.marker_utils import find_basic_loop_bodies, get_marker
+from osaca.parser.register import RegisterOperand
+from osaca.parser.memory import MemoryOperand
+from osaca.parser.immediate import ImmediateOperand
+from osaca.parser.identifier import IdentifierOperand
 
 from kerncraft import iaca_get, __version__
 
@@ -104,9 +108,9 @@ def compute_block_metric(block):
             # Count registers used
             for prefix in register_class_usage:
                 for op in line.operands:
-                    if 'register' in op:
-                        if op.register.name.startswith(prefix):
-                            register_class_usage[prefix].append(op.register.name)
+                    if isinstance(op, RegisterOperand):
+                        if op.name.startswith(prefix):
+                            register_class_usage[prefix].append(op.name)
 
             # Identify and count packed and avx instructions
             if re.match(r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]",
@@ -136,9 +140,9 @@ def get_pointer_increment(block):
                 continue
 
             # Extract destination references, ignoring var(%rip)
-            dst_mem_references = [op.memory for op in line.semantic_operands.destination
-                                  if 'memory' in op and
-                                  (op.memory.base is None or op.memory.base.name != 'rip')]
+            dst_mem_references = [op for op in line.semantic_operands["destination"]
+                                  if isinstance(op, MemoryOperand) and
+                                  (op.base is None or op.base.name != 'rip')]
             if dst_mem_references:
                 if not stores_only:
                     stores_only = True
@@ -147,43 +151,43 @@ def get_pointer_increment(block):
 
             # If no destination references were found sofar, include source references
             if not stores_only:
-                mem_references += [op.memory for op in line.semantic_operands.source
-                                   if 'memory' in op]
+                mem_references += [op for op in line.semantic_operands["source"]
+                                   if isinstance(op, MemoryOperand)]
             if re.match(r'^inc[bwlq]?$', line.instruction):
-                reg = line.operands[0].register.name
+                reg = line.operands[0].name
                 modified_registers.append(reg)
                 increments[reg] = 1
-            elif re.match(r'^add[bwlq]?$', line.instruction) and 'immediate' in line.operands[0] \
-                    and 'register' in line.operands[1]:
-                reg = line.operands[1].register.name
-                increments[reg] = int(line.operands[0].immediate.value)
+            elif re.match(r'^add[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \
+                    and isinstance(line.operands[1], RegisterOperand):
+                reg = line.operands[1].name
+                increments[reg] = int(line.operands[0].value)
                 modified_registers.append(reg)
             elif re.match(r'^dec[bwlq]?$', line.instruction):
-                reg = line.operands[0].register.name
+                reg = line.operands[0].name
                 modified_registers.append(reg)
                 increments[reg] = -1
-            elif re.match(r'^sub[bwlq]?$', line.instruction) and 'immediate' in line.operands[0] \
-                    and 'register' in line.operands[1]:
-                reg = line.operands[1].register.name
+            elif re.match(r'^sub[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \
+                    and  isinstance(line.operands[1], RegisterOperand):
+                reg = line.operands[1].name
                 modified_registers.append(reg)
-                increments[reg] = -int(line.operands[0].immediate.value)
+                increments[reg] = -int(line.operands[0].value)
             elif re.match(r'^lea[bwlq]?$', line.instruction):
                 # `lea 1(%r11), %r11` is the same as `add $1, %r11`
-                if line.operands[0].memory.base is not None and \
-                        line.operands[0].memory.base.name  == line.operands[1].register.name and \
-                        line.operands[0].memory.index is None:
-                    reg = line.operands[1].register.name
+                if line.operands[0].base is not None and \
+                        line.operands[0].base.name  == line.operands[1].name and \
+                        line.operands[0].index is None:
+                    reg = line.operands[1].name
                     modified_registers.append(reg)
                     increments[reg] = int(
-                        line.operands[0].memory.offset.value)
+                        line.operands[0].offset.value)
                 # `lea 1(,%r11), %r11` is the same as `add $1, %r11`
-                if line.operands[0].memory.index is not None and \
-                        line.operands[0].memory.index.name  == line.operands[1].register.name and \
-                        line.operands[0].memory.base is None:
-                    reg = line.operands[1].register.name
+                if line.operands[0].index is not None and \
+                        line.operands[0].index.name  == line.operands[1].name and \
+                        line.operands[0].base is None:
+                    reg = line.operands[1].name
                     modified_registers.append(reg)
                     increments[reg] = int(
-                        line.operands[0].memory.offset.value)
+                        line.operands[0].offset.value)
 
         # deduce loop increment from memory index register
         pointer_increment = None  # default -> can not decide, let user choose
@@ -248,10 +252,10 @@ def compute_block_metric(block):
                 iarithmetic_ctr += 1
             # Counting use of vector registers
             for op in line.operands:
-                if 'register' in op and  'prefix' in op.register and op.register.prefix in 'zv':
-                    vector_ctr += 1
-                if 'register' in op and  'range' in op.register and op.register.range[0].prefix in 'zv':
+                if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in 'zv':
                     vector_ctr += 1
+                #if isinstance(op, RegisterOperand) and  'range' in op.register and op.register.range[0].prefix in 'zv':
+                #    vector_ctr += 1
             # Count all instructions
             instruction_ctr += 1
 
@@ -276,24 +280,24 @@ def get_pointer_increment(block):
 
         # build dict of modified registers in block with count of number of modifications
         modified_registers = defaultdict(int)
-        for dests in [l.semantic_operands.destination for l in block if 'semantic_operands' in l]:
+        for dests in [l.semantic_operands["destination"] for l in block]:
             for d in dests:
-                if 'register' in d:
-                    if 'range' in d.register:
-                        modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1
-                    else:
-                        modified_registers[AArch64.normalize_to_register_str(d.register)] += 1
+                if isinstance(d, RegisterOperand):
+                    #if 'range' in d.register:
+                    #    modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1
+                    #else:
+                    modified_registers[AArch64.normalize_to_register_str(d)] += 1
         for l in block:
             for d in l.operands:
-                if 'memory' in d:
-                    if 'post_indexed' in d.memory or 'pre_indexed' in d.memory:
-                        modified_registers[AArch64.normalize_to_register_str(d.memory.base)] += 1
+                if isinstance(d, MemoryOperand):
+                    if d.post_indexed is not False or d.pre_indexed:
+                        modified_registers[AArch64.normalize_to_register_str(d.base)] += 1
                         inc = 1
-                        if 'post_indexed' in d.memory and 'value' in d.memory.post_indexed:
-                            inc = int(d.memory.post_indexed.value)
-                        if 'pre_indexed' in d.memory:
-                            inc = int(d.memory.offset.value)
-                        increments[AArch64.normalize_to_register_str(d.memory.base)] = inc
+                        if isinstance(d.post_indexed, dict):
+                            inc = int(d.post_indexed["value"])
+                        if d.pre_indexed:
+                            inc = int(d.offset.value)
+                        increments[AArch64.normalize_to_register_str(d.base)] = inc
         
         for line in block:
             # Skip non-instruction lines (such as comments and labels)
@@ -302,16 +306,16 @@ def get_pointer_increment(block):
 
             # Extract and filter destination references (stores)
             dst_mem_references = []
-            for dst in [op.memory for op in chain(line.semantic_operands.destination,
-                                                  line.semantic_operands.src_dst)
-                        if 'memory' in op]:
+            for dst in [op for op in chain(line.semantic_operands["destination"],
+                                                  line.semantic_operands["src_dst"])
+                        if isinstance(op, MemoryOperand)]:
                 # base or index must be a modified (i.e., changing) register
                 if AArch64.normalize_to_register_str(dst.base) not in modified_registers and \
                     AArch64.normalize_to_register_str(dst.index) not in modified_registers:
                     continue
 
                 # offset operands with identifiers (e.g. `:lo12:gosa`) are ignored
-                if dst.offset is not None and 'identifier' in dst.offset:
+                if dst.offset is not None and isinstance(dst.offset, IdentifierOperand):
                     continue
 
                 dst_mem_references.append(dst)
@@ -323,23 +327,23 @@ def get_pointer_increment(block):
 
             # If no destination references were found sofar, include source references (loads)
             if not stores_only:
-                mem_references += [op.memory for op in chain(line.semantic_operands.source,
-                                                             line.semantic_operands.src_dst)
-                                   if 'memory' in op]
+                mem_references += [op for op in chain(line.semantic_operands["source"],
+                                                             line.semantic_operands["src_dst"])
+                                   if isinstance(op, MemoryOperand)]
 
             # ADD dest_reg, src_reg, immd
             if re.match(r'^add[s]?$', line.instruction) and \
                     line.operands[0] == line.operands[1] and \
-                    'immediate' in line.operands[2]:
-                reg_name = AArch64.normalize_to_register_str(line.operands[0].register)
-                inc = int(line.operands[2].immediate.value)
+                    isinstance(line.operands[2], ImmediateOperand):
+                reg_name = AArch64.normalize_to_register_str(line.operands[0])
+                inc = int(line.operands[2].value)
                 increments[reg_name] = inc
             # SUB dest_reg, src_reg, immd
             elif re.match(r'^sub[s]?$', line.instruction) and \
                     line.operands[0] == line.operands[1] and \
-                    'immediate' in line.operands[2]:
-                reg_name = AArch64.normalize_to_register_str(line.operands[0].register)
-                inc = -int(line.operands[2].immediate.value)
+                    isinstance(line.operands[2], ImmediateOperand):
+                reg_name = AArch64.normalize_to_register_str(line.operands[0])
+                inc = -int(line.operands[2].value)
                 if reg_name in increments and increments[reg_name] == inc:
                     increments[reg_name] = inc
 
@@ -352,11 +356,11 @@ def get_pointer_increment(block):
             if line.instruction is None:
                 continue
             # LSL dest_reg, src_reg, immd
-            if re.match(r'^lsl$', line.instruction) and 'immediate' in line.operands[2] and \
-                    AArch64.normalize_to_register_str(line.operands[1].register) in increments:
-                increments[AArch64.normalize_to_register_str(line.operands[0].register)] = \
-                    increments[AArch64.normalize_to_register_str(line.operands[1].register)] * \
-                    2**int(line.operands[2].immediate.value)
+            if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \
+                    AArch64.normalize_to_register_str(line.operands[1]) in increments:
+                increments[AArch64.normalize_to_register_str(line.operands[0])] = \
+                    increments[AArch64.normalize_to_register_str(line.operands[1])] * \
+                    2**int(line.operands[2].value)
 
         new_increments = []
         # Third pass to find registers based on constant +- increment
@@ -370,13 +374,13 @@ def get_pointer_increment(block):
                     factor = 1
                 else:
                     factor = -1
-                if 'register' not in line.operands[1] or 'register' not in line.operands[2]:
+                if not isinstance(line.operands[1], RegisterOperand) or not isinstance(line.operands[2], RegisterOperand): 
                     continue
                 for i,j in [(1,2), (2,1)]:
-                    reg_i_name = AArch64.normalize_to_register_str(line.operands[i].register)
-                    reg_j_name = AArch64.normalize_to_register_str(line.operands[j].register)
+                    reg_i_name = AArch64.normalize_to_register_str(line.operands[i])
+                    reg_j_name = AArch64.normalize_to_register_str(line.operands[j])
                     if reg_i_name in increments and reg_j_name not in modified_registers:
-                        reg_dest_name = AArch64.normalize_to_register_str(line.operands[0].register)
+                        reg_dest_name = AArch64.normalize_to_register_str(line.operands[0])
                         inc = factor * increments[reg_i_name]
                         if reg_dest_name in increments and increments[reg_dest_name] == inc:
                             modified_registers[reg_dest_name] -= 1
@@ -392,13 +396,13 @@ def get_pointer_increment(block):
             if line.instruction is None:
                 continue
             # LSL dest_reg, src_reg, immd
-            if re.match(r'^lsl$', line.instruction) and 'immediate' in line.operands[2] and \
-                    'register' in line.operands[1]:
-                src_reg_name = AArch64.normalize_to_register_str(line.operands[1].register)
+            if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \
+                    isinstance(line.operands[1], RegisterOperand):
+                src_reg_name = AArch64.normalize_to_register_str(line.operands[1])
                 if src_reg_name in new_increments and src_reg_name in increments:
-                    increments[AArch64.normalize_to_register_str(line.operands[0].register)] = \
+                    increments[AArch64.normalize_to_register_str(line.operands[0])] = \
                         increments[src_reg_name] * \
-                        2**int(line.operands[2].immediate.value)
+                        2**int(line.operands[2].value)
 
         # deduce loop increment from memory index register
         address_registers = []
@@ -412,8 +416,8 @@ def get_pointer_increment(block):
                 if index_reg in increments:
                     reg = index_reg
                     # If index is used, a scale other than 1 needs to be considered
-                    if 'shift' in mref.index and mref.index.shift:
-                        scales[reg] = 2**int(mref.index.shift[0].value)
+                    if mref.index.shift:
+                        scales[reg] = 2**int(mref.index.shift[0]['value'])
                 else:
                     reg = base_reg
             else:
@@ -467,7 +471,7 @@ def userselect_block(blocks, default=None, debug=False):
         # Blocks first line is the label, the user will be able to spot it, so we don't need to
         # print it
         label_list.append(label)
-        print('\n\t'.join([b['line'] for b in block]))
+        print('\n\t'.join([b.line for b in block]))
 
     # Show all possible block labels in the end
     print(
@@ -490,7 +494,7 @@ def hashblock(block):
     # TODO normalize register names
     # TODO normalize instruction order
     # Remove target label and jump
-    h = md5('\n'.join([b['line'] for b in block]).encode())
+    h = md5('\n'.join([b.line for b in block]).encode())
     return h.hexdigest()
 
 
@@ -614,7 +618,7 @@ def asm_instrumentation(input_file, output_file=None,
                  marker_end + asm_lines[block_end:]
 
     if output_file is not None:
-        output_file.writelines([l['line']+'\n' for l in marked_asm])
+        output_file.writelines([l.line+'\n' for l in marked_asm])
 
     return block_lines, pointer_increment
 
@@ -668,7 +672,7 @@ def osaca_analyse_instrumented_assembly(
     result['port cycles'] = OrderedDict(list(zip(osaca_machine_model['ports'], throughput_values)))
     result['throughput'] = max(throughput_values + [max_lcd])
     result['lcd'] = max_lcd
-    result['cp_latency'] = sum([x['latency_cp'] for x in cp_list])
+    result['cp_latency'] = sum([x.latency_cp for x in cp_list])
     result['uops'] = None  # Not given by OSACA
 
     unmatched_ratio = osaca.get_unmatched_instruction_ratio(kernel)
@@ -861,4 +865,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file
diff --git a/kerncraft/kerncraft.py b/kerncraft/kerncraft.py
index 54e5d33..5cc81ba 100755
--- a/kerncraft/kerncraft.py
+++ b/kerncraft/kerncraft.py
@@ -204,7 +204,7 @@ def create_parser():
                              'description file (-std=c99 is always added).')
 
     # Needed for ECM and RooflineASM models:
-    parser.add_argument('--incore-model', '-i', type=str, default=None,
+    parser.add_argument('--incore-model', '-i', type=str, default="OSACA",
                         help='In-core model to use, default is first in machine description file.')
 
     for m in models.__all__:
diff --git a/tests/test_incore_model.py b/tests/test_incore_model.py
index 2a9c44c..8b7937b 100644
--- a/tests/test_incore_model.py
+++ b/tests/test_incore_model.py
@@ -22,28 +22,28 @@ def test_2d5pt_constcoeffs(self):
         with open(self._find_file('2d-5pt-constcoeffs.s')) as f:
             block_lines, pointer_increment = asm_instrumentation(f)
 
-        self.assertEqual(block_lines[0]['label'], '.L36')
+        self.assertEqual(block_lines[0].label, '.L36')
         self.assertEqual(pointer_increment, 8)
 
     def test_2d5pt_varcoeffs(self):
         with open(self._find_file('2d-5pt-varcoeffs.s')) as f:
             block_lines, pointer_increment = asm_instrumentation(f)
 
-        self.assertEqual(block_lines[0]['label'], '.L43')
+        self.assertEqual(block_lines[0].label, '.L43')
         self.assertEqual(pointer_increment, 16)
 
     def test_3d25pt_semi(self):
         with open(self._find_file('3d-25pt_semi.s')) as f:
             block_lines, pointer_increment = asm_instrumentation(f, pointer_increment=8)
 
-        self.assertEqual(block_lines[0]['label'], 'LBB0_62')
+        self.assertEqual(block_lines[0].label, 'LBB0_62')
         #self.assertEqual(pointer_increment, 8)
 
     def test_matvec_trans(self):
         with open(self._find_file('matvec_trans.s')) as f:
             block_lines, pointer_increment = asm_instrumentation(f)
 
-        self.assertEqual(block_lines[0]['label'], 'LBB0_30')
+        self.assertEqual(block_lines[0].label, 'LBB0_30')
         self.assertEqual(pointer_increment, 64)
 
     def test_increment_detection_x86(self):
@@ -905,4 +905,4 @@ def test_increment_detection_aarch64(self):
 
 if __name__ == '__main__':
     suite = unittest.TestLoader().loadTestsFromTestCase(TestIncoreModelX86)
-    unittest.TextTestRunner(verbosity=2, buffer=True).run(suite)
+    unittest.TextTestRunner(verbosity=2, buffer=True).run(suite)
\ No newline at end of file

From d7c9be65bc55a690461ff3dfb5430dc3e2581a86 Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Sun, 7 Jan 2024 15:20:58 +0100
Subject: [PATCH 2/9] Trying to build the right branch of OSACA

---
 .github/workflows/test-n-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index 3924aa2..0331b07 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -20,7 +20,7 @@ jobs:
         python -m pip install --upgrade pip
         python -m pip install codecov requests sympy
         python -m pip install -e .
-        python -m pip install git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm
+        python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
     - name: Test
       run: |

From 4d76e608e4be4333fefd5b08fa03fe60d1f2dc0c Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Wed, 10 Jan 2024 14:09:39 +0100
Subject: [PATCH 3/9] Changed order of pip installs

---
 .github/workflows/test-n-publish.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index 0331b07..ee47a74 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -18,9 +18,9 @@ jobs:
     - name: Install
       run: |
         python -m pip install --upgrade pip
+        python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         python -m pip install codecov requests sympy
         python -m pip install -e .
-        python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
     - name: Test
       run: |

From 4e30ae598c3cb959e09b0150f19dae834adf6dff Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Wed, 10 Jan 2024 14:40:05 +0100
Subject: [PATCH 4/9] Try with PyYAML

---
 .github/workflows/test-n-publish.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index ee47a74..a1fe18b 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -18,6 +18,7 @@ jobs:
     - name: Install
       run: |
         python -m pip install --upgrade pip
+        python -m pip install -U PyYAML
         python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         python -m pip install codecov requests sympy
         python -m pip install -e .

From 6d06b20db38634381c18c28b2392dc3a30c5a79b Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Thu, 11 Jan 2024 17:08:29 +0100
Subject: [PATCH 5/9] Merged master

---
 .github/workflows/test-n-publish.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index e633412..a1fe18b 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -22,10 +22,7 @@ jobs:
         python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         python -m pip install codecov requests sympy
         python -m pip install -e .
-<<<<<<< HEAD
         #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
-=======
->>>>>>> master
     - name: Test
       run: |
         coverage run -p tests/all_tests.py

From b6a3af8c97b1c314d6930080dd064f298656adfe Mon Sep 17 00:00:00 2001
From: stefandesouza <stefan.desouza@outlook.com>
Date: Tue, 5 Mar 2024 12:05:57 +0100
Subject: [PATCH 6/9] Updated instruction form attributes

---
 kerncraft/incore_model.py | 544 ++++++++++++++++++++++----------------
 1 file changed, 320 insertions(+), 224 deletions(-)

diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py
index 4b75a25..0246411 100755
--- a/kerncraft/incore_model.py
+++ b/kerncraft/incore_model.py
@@ -30,11 +30,11 @@
 
 
 def itemsEqual(lst):
-   return lst[1:] == lst[:-1]
+    return lst[1:] == lst[:-1]
 
 
 class IncoreModel:
-    def __init__(self, isa='x86'):
+    def __init__(self, isa="x86"):
         isa
 
 
@@ -52,10 +52,10 @@ class LlvmMCA(IncoreModel):
 
 class ISA:
     @staticmethod
-    def get_isa(isa='x86'):
-        if isa.lower() == 'x86':
+    def get_isa(isa="x86"):
+        if isa.lower() == "x86":
             return x86
-        elif isa.lower() == 'aarch64':
+        elif isa.lower() == "aarch64":
             return AArch64
 
     @staticmethod
@@ -95,12 +95,12 @@ class x86(ISA):
     @staticmethod
     def compute_block_metric(block):
         """Return comparable metric on block information."""
-        register_class_usage = {'zmm': [], 'ymm': [], 'xmm': []}
+        register_class_usage = {"zmm": [], "ymm": [], "xmm": []}
         packed_instruction_ctr, avx_instruction_ctr, instruction_ctr = 0, 0, 0
         # Analyze code to determine metric
         for line in block:
             # Skip non-instruction lines (e.g., comments)
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
             # Count all instructions
             instruction_ctr += 1
@@ -113,18 +113,22 @@ def compute_block_metric(block):
                             register_class_usage[prefix].append(op.name)
 
             # Identify and count packed and avx instructions
-            if re.match(r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]",
-                    line.instruction):
-                if line.instruction.startswith('v'):
+            if re.match(
+                r"^[v]?(movu|mul|add|sub|div|fmadd(132|213|231)?)[h]?p[ds]", line.mnemonic
+            ):
+                if line.mnemonic.startswith("v"):
                     avx_instruction_ctr += 1
                 packed_instruction_ctr += 1
 
         # Build metric
-        return (packed_instruction_ctr, avx_instruction_ctr,
-                len(set(register_class_usage['zmm'])),
-                len(set(register_class_usage['ymm'])),
-                len(set(register_class_usage['xmm'])),
-                instruction_ctr)
+        return (
+            packed_instruction_ctr,
+            avx_instruction_ctr,
+            len(set(register_class_usage["zmm"])),
+            len(set(register_class_usage["ymm"])),
+            len(set(register_class_usage["xmm"])),
+            instruction_ctr,
+        )
 
     @staticmethod
     def get_pointer_increment(block):
@@ -136,13 +140,15 @@ def get_pointer_increment(block):
         modified_registers = []
         for line in block:
             # Skip non-instruction lines (e.g., comments)
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
 
             # Extract destination references, ignoring var(%rip)
-            dst_mem_references = [op for op in line.semantic_operands["destination"]
-                                  if isinstance(op, MemoryOperand) and
-                                  (op.base is None or op.base.name != 'rip')]
+            dst_mem_references = [
+                op
+                for op in line.semantic_operands["destination"]
+                if isinstance(op, MemoryOperand) and (op.base is None or op.base.name != "rip")
+            ]
             if dst_mem_references:
                 if not stores_only:
                     stores_only = True
@@ -151,60 +157,76 @@ def get_pointer_increment(block):
 
             # If no destination references were found sofar, include source references
             if not stores_only:
-                mem_references += [op for op in line.semantic_operands["source"]
-                                   if isinstance(op, MemoryOperand)]
-            if re.match(r'^inc[bwlq]?$', line.instruction):
+                mem_references += [
+                    op for op in line.semantic_operands["source"] if isinstance(op, MemoryOperand)
+                ]
+            if re.match(r"^inc[bwlq]?$", line.mnemonic):
                 reg = line.operands[0].name
                 modified_registers.append(reg)
                 increments[reg] = 1
-            elif re.match(r'^add[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \
-                    and isinstance(line.operands[1], RegisterOperand):
+            elif (
+                re.match(r"^add[bwlq]?$", line.mnemonic)
+                and isinstance(line.operands[0], ImmediateOperand)
+                and isinstance(line.operands[1], RegisterOperand)
+            ):
                 reg = line.operands[1].name
                 increments[reg] = int(line.operands[0].value)
                 modified_registers.append(reg)
-            elif re.match(r'^dec[bwlq]?$', line.instruction):
+            elif re.match(r"^dec[bwlq]?$", line.mnemonic):
                 reg = line.operands[0].name
                 modified_registers.append(reg)
                 increments[reg] = -1
-            elif re.match(r'^sub[bwlq]?$', line.instruction) and isinstance(line.operands[0], ImmediateOperand) \
-                    and  isinstance(line.operands[1], RegisterOperand):
+            elif (
+                re.match(r"^sub[bwlq]?$", line.mnemonic)
+                and isinstance(line.operands[0], ImmediateOperand)
+                and isinstance(line.operands[1], RegisterOperand)
+            ):
                 reg = line.operands[1].name
                 modified_registers.append(reg)
                 increments[reg] = -int(line.operands[0].value)
-            elif re.match(r'^lea[bwlq]?$', line.instruction):
+            elif re.match(r"^lea[bwlq]?$", line.mnemonic):
                 # `lea 1(%r11), %r11` is the same as `add $1, %r11`
-                if line.operands[0].base is not None and \
-                        line.operands[0].base.name  == line.operands[1].name and \
-                        line.operands[0].index is None:
+                if (
+                    line.operands[0].base is not None
+                    and line.operands[0].base.name == line.operands[1].name
+                    and line.operands[0].index is None
+                ):
                     reg = line.operands[1].name
                     modified_registers.append(reg)
-                    increments[reg] = int(
-                        line.operands[0].offset.value)
+                    increments[reg] = int(line.operands[0].offset.value)
                 # `lea 1(,%r11), %r11` is the same as `add $1, %r11`
-                if line.operands[0].index is not None and \
-                        line.operands[0].index.name  == line.operands[1].name and \
-                        line.operands[0].base is None:
+                if (
+                    line.operands[0].index is not None
+                    and line.operands[0].index.name == line.operands[1].name
+                    and line.operands[0].base is None
+                ):
                     reg = line.operands[1].name
                     modified_registers.append(reg)
-                    increments[reg] = int(
-                        line.operands[0].offset.value)
+                    increments[reg] = int(line.operands[0].offset.value)
 
         # deduce loop increment from memory index register
         pointer_increment = None  # default -> can not decide, let user choose
         possible_idx_regs = None
         if mem_references:
             # we found memory references to work with
-            possible_idx_regs = list(set(increments.keys()).intersection(
-                set([mref.base.name for mref in mem_references if mref.base is not None] +
-                    [mref.index.name for mref in mem_references if mref.index is not None])))
+            possible_idx_regs = list(
+                set(increments.keys()).intersection(
+                    set(
+                        [mref.base.name for mref in mem_references if mref.base is not None]
+                        + [mref.index.name for mref in mem_references if mref.index is not None]
+                    )
+                )
+            )
             for mref in mem_references:
                 for reg in list(possible_idx_regs):
                     # Only consider references with two registers, where one could be an
                     # index
                     if None not in [mref.base, mref.index]:
                         # One needs to mach, other registers will be excluded
-                        if not ((mref.base is not None and reg == mref.base.name) or
-                                (mref.index is not None and reg == mref.index.name)):
+                        if not (
+                            (mref.base is not None and reg == mref.base.name)
+                            or (mref.index is not None and reg == mref.index.name)
+                        ):
                             # reg can not be it
                             possible_idx_regs.remove(reg)
 
@@ -213,15 +235,19 @@ def get_pointer_increment(block):
                 # good, exactly one register was found
                 idx_reg = possible_idx_regs[0]
             elif possible_idx_regs and itemsEqual(
-                    [increments[pidxreg] for pidxreg in possible_idx_regs]):
+                [increments[pidxreg] for pidxreg in possible_idx_regs]
+            ):
                 # multiple were option found, but all have the same increment
                 # use first match:
                 idx_reg = possible_idx_regs[0]
 
             if idx_reg and modified_registers.count(idx_reg) == 1:
-                mem_scales = [mref.scale for mref in mem_references
-                              if (mref.index is not None and idx_reg == mref.index.name) or
-                                 (mref.base is not None and idx_reg == mref.base.name)]
+                mem_scales = [
+                    mref.scale
+                    for mref in mem_references
+                    if (mref.index is not None and idx_reg == mref.index.name)
+                    or (mref.base is not None and idx_reg == mref.base.name)
+                ]
 
                 if itemsEqual(mem_scales):
                     # good, all scales are equal
@@ -243,32 +269,32 @@ def compute_block_metric(block):
         # Analyze code to determine metric
         for line in block:
             # Skip non-instruction lines (e.g., comments)
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
             # Counting basic arithmetic insstructions
-            if line.instruction in ['fmul', 'fdiv', 'fadd', 'fsub']:
+            if line.mnemonic in ["fmul", "fdiv", "fadd", "fsub"]:
                 farithmetic_ctr += 1
-            elif line.instruction in ['add', 'sub', 'mul']:
+            elif line.mnemonic in ["add", "sub", "mul"]:
                 iarithmetic_ctr += 1
             # Counting use of vector registers
             for op in line.operands:
-                if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in 'zv':
+                if isinstance(op, RegisterOperand) and op.prefix is not None and op.prefix in "zv":
                     vector_ctr += 1
-                #if isinstance(op, RegisterOperand) and  'range' in op.register and op.register.range[0].prefix in 'zv':
+                # if isinstance(op, RegisterOperand) and  'range' in op.register and op.register.range[0].prefix in 'zv':
                 #    vector_ctr += 1
             # Count all instructions
             instruction_ctr += 1
 
         # Build metric
         return (vector_ctr, farithmetic_ctr, iarithmetic_ctr, instruction_ctr)
-    
+
     @staticmethod
     def normalize_to_register_str(register):
         if register is None:
             return None
         prefix = register.prefix
-        if prefix in 'wx':
-            prefix = 'x'
+        if prefix in "wx":
+            prefix = "x"
         return prefix + register.name
 
     @staticmethod
@@ -283,9 +309,9 @@ def get_pointer_increment(block):
         for dests in [l.semantic_operands["destination"] for l in block]:
             for d in dests:
                 if isinstance(d, RegisterOperand):
-                    #if 'range' in d.register:
+                    # if 'range' in d.register:
                     #    modified_registers[AArch64.normalize_to_register_str(d.register.range[0])] += 1
-                    #else:
+                    # else:
                     modified_registers[AArch64.normalize_to_register_str(d)] += 1
         for l in block:
             for d in l.operands:
@@ -298,20 +324,26 @@ def get_pointer_increment(block):
                         if d.pre_indexed:
                             inc = int(d.offset.value)
                         increments[AArch64.normalize_to_register_str(d.base)] = inc
-        
+
         for line in block:
             # Skip non-instruction lines (such as comments and labels)
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
 
             # Extract and filter destination references (stores)
             dst_mem_references = []
-            for dst in [op for op in chain(line.semantic_operands["destination"],
-                                                  line.semantic_operands["src_dst"])
-                        if isinstance(op, MemoryOperand)]:
+            for dst in [
+                op
+                for op in chain(
+                    line.semantic_operands["destination"], line.semantic_operands["src_dst"]
+                )
+                if isinstance(op, MemoryOperand)
+            ]:
                 # base or index must be a modified (i.e., changing) register
-                if AArch64.normalize_to_register_str(dst.base) not in modified_registers and \
-                    AArch64.normalize_to_register_str(dst.index) not in modified_registers:
+                if (
+                    AArch64.normalize_to_register_str(dst.base) not in modified_registers
+                    and AArch64.normalize_to_register_str(dst.index) not in modified_registers
+                ):
                     continue
 
                 # offset operands with identifiers (e.g. `:lo12:gosa`) are ignored
@@ -327,56 +359,72 @@ def get_pointer_increment(block):
 
             # If no destination references were found sofar, include source references (loads)
             if not stores_only:
-                mem_references += [op for op in chain(line.semantic_operands["source"],
-                                                             line.semantic_operands["src_dst"])
-                                   if isinstance(op, MemoryOperand)]
+                mem_references += [
+                    op
+                    for op in chain(
+                        line.semantic_operands["source"], line.semantic_operands["src_dst"]
+                    )
+                    if isinstance(op, MemoryOperand)
+                ]
 
             # ADD dest_reg, src_reg, immd
-            if re.match(r'^add[s]?$', line.instruction) and \
-                    line.operands[0] == line.operands[1] and \
-                    isinstance(line.operands[2], ImmediateOperand):
+            if (
+                re.match(r"^add[s]?$", line.mnemonic)
+                and line.operands[0] == line.operands[1]
+                and isinstance(line.operands[2], ImmediateOperand)
+            ):
                 reg_name = AArch64.normalize_to_register_str(line.operands[0])
                 inc = int(line.operands[2].value)
                 increments[reg_name] = inc
             # SUB dest_reg, src_reg, immd
-            elif re.match(r'^sub[s]?$', line.instruction) and \
-                    line.operands[0] == line.operands[1] and \
-                    isinstance(line.operands[2], ImmediateOperand):
+            elif (
+                re.match(r"^sub[s]?$", line.mnemonic)
+                and line.operands[0] == line.operands[1]
+                and isinstance(line.operands[2], ImmediateOperand)
+            ):
                 reg_name = AArch64.normalize_to_register_str(line.operands[0])
                 inc = -int(line.operands[2].value)
                 if reg_name in increments and increments[reg_name] == inc:
                     increments[reg_name] = inc
 
         # Remove any increments that are modiefed more than once
-        increments = {reg_name: inc for reg_name, inc in increments.items()
-                      if modified_registers[reg_name] == 1}
+        increments = {
+            reg_name: inc
+            for reg_name, inc in increments.items()
+            if modified_registers[reg_name] == 1
+        }
 
         # Second pass to find lsl instructions on increments
         for line in block:
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
             # LSL dest_reg, src_reg, immd
-            if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \
-                    AArch64.normalize_to_register_str(line.operands[1]) in increments:
-                increments[AArch64.normalize_to_register_str(line.operands[0])] = \
-                    increments[AArch64.normalize_to_register_str(line.operands[1])] * \
-                    2**int(line.operands[2].value)
+            if (
+                re.match(r"^lsl$", line.mnemonic)
+                and isinstance(line.operands[2], ImmediateOperand)
+                and AArch64.normalize_to_register_str(line.operands[1]) in increments
+            ):
+                increments[AArch64.normalize_to_register_str(line.operands[0])] = increments[
+                    AArch64.normalize_to_register_str(line.operands[1])
+                ] * 2 ** int(line.operands[2].value)
 
         new_increments = []
         # Third pass to find registers based on constant +- increment
         for line in block:
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
             # ADD|SUB dest_reg, const_reg, increment_reg (source registers may be switched)
-            m = re.match(r'^(add|sub)[s]?$', line.instruction)
+            m = re.match(r"^(add|sub)[s]?$", line.mnemonic)
             if m:
-                if m.group(1) == 'add':
+                if m.group(1) == "add":
                     factor = 1
                 else:
                     factor = -1
-                if not isinstance(line.operands[1], RegisterOperand) or not isinstance(line.operands[2], RegisterOperand): 
+                if not isinstance(line.operands[1], RegisterOperand) or not isinstance(
+                    line.operands[2], RegisterOperand
+                ):
                     continue
-                for i,j in [(1,2), (2,1)]:
+                for i, j in [(1, 2), (2, 1)]:
                     reg_i_name = AArch64.normalize_to_register_str(line.operands[i])
                     reg_j_name = AArch64.normalize_to_register_str(line.operands[j])
                     if reg_i_name in increments and reg_j_name not in modified_registers:
@@ -388,21 +436,27 @@ def get_pointer_increment(block):
                         new_increments.append(reg_dest_name)
 
         # Remove any increments that are modified more often than updates have been detected
-        increments = {reg_name: inc for reg_name, inc in increments.items()
-                      if modified_registers[reg_name] == 1}
+        increments = {
+            reg_name: inc
+            for reg_name, inc in increments.items()
+            if modified_registers[reg_name] == 1
+        }
 
         # Last pass to find lsl instructions on increments
         for line in block:
-            if line.instruction is None:
+            if line.mnemonic is None:
                 continue
             # LSL dest_reg, src_reg, immd
-            if re.match(r'^lsl$', line.instruction) and isinstance(line.operands[2], ImmediateOperand) and \
-                    isinstance(line.operands[1], RegisterOperand):
+            if (
+                re.match(r"^lsl$", line.mnemonic)
+                and isinstance(line.operands[2], ImmediateOperand)
+                and isinstance(line.operands[1], RegisterOperand)
+            ):
                 src_reg_name = AArch64.normalize_to_register_str(line.operands[1])
                 if src_reg_name in new_increments and src_reg_name in increments:
-                    increments[AArch64.normalize_to_register_str(line.operands[0])] = \
-                        increments[src_reg_name] * \
-                        2**int(line.operands[2].value)
+                    increments[AArch64.normalize_to_register_str(line.operands[0])] = increments[
+                        src_reg_name
+                    ] * 2 ** int(line.operands[2].value)
 
         # deduce loop increment from memory index register
         address_registers = []
@@ -417,7 +471,7 @@ def get_pointer_increment(block):
                     reg = index_reg
                     # If index is used, a scale other than 1 needs to be considered
                     if mref.index.shift:
-                        scales[reg] = 2**int(mref.index.shift[0]['value'])
+                        scales[reg] = 2 ** int(mref.index.shift[0]["value"])
                 else:
                     reg = base_reg
             else:
@@ -443,16 +497,16 @@ def get_pointer_increment(block):
 def userselect_increment(block, default=None, comment=None):
     """Let user interactively select byte increment."""
     print("Selected block:")
-    print('\n    ' + ('\n    '.join([b.line for b in block])))
-    print('hash: ', hashblock(block))
+    print("\n    " + ("\n    ".join([b.line for b in block])))
+    print("hash: ", hashblock(block))
     print()
 
     increment = None
     while increment is None:
         prompt = "Choose store pointer increment (number of bytes)"
         if default:
-            prompt += '[{}]'.format(default)
-        prompt += ': '
+            prompt += "[{}]".format(default)
+        prompt += ": "
         increment = input(prompt)
         try:
             increment = int(increment)
@@ -471,13 +525,10 @@ def userselect_block(blocks, default=None, debug=False):
         # Blocks first line is the label, the user will be able to spot it, so we don't need to
         # print it
         label_list.append(label)
-        print('\n\t'.join([b.line for b in block]))
+        print("\n\t".join([b.line for b in block]))
 
     # Show all possible block labels in the end
-    print(
-        '-----------------------------\n'
-        + 'Possible blocks to be marked:'
-    )
+    print("-----------------------------\n" + "Possible blocks to be marked:")
     for label in label_list:
         print(label)
 
@@ -494,14 +545,14 @@ def hashblock(block):
     # TODO normalize register names
     # TODO normalize instruction order
     # Remove target label and jump
-    h = md5('\n'.join([b.line for b in block]).encode())
+    h = md5("\n".join([b.line for b in block]).encode())
     return h.hexdigest()
 
 
-def find_increment_in_cache(block, cache_file='~/.kerncraft/increment_cache'):
+def find_increment_in_cache(block, cache_file="~/.kerncraft/increment_cache"):
     search_hash = hashblock(block)
     cache_file = expanduser(cache_file)
-    cache = ''
+    cache = ""
     if os.path.exists(cache_file):
         with open(cache_file) as f:
             cache = f.readlines()
@@ -520,14 +571,15 @@ def find_increment_in_cache(block, cache_file='~/.kerncraft/increment_cache'):
 
 
 def store_increment_to_cache(
-        block, pointer_increment, cache_file='~/.kerncraft/increment_cache', comment=None):
+    block, pointer_increment, cache_file="~/.kerncraft/increment_cache", comment=None
+):
     cache_file = expanduser(cache_file)
     pathlib.Path(cache_file).parents[0].mkdir(parents=True, exist_ok=True)
     line = "{} {}".format(hashblock(block), pointer_increment)
     if comment:
         line += " #{}".format(comment)
-    with open(cache_file, 'a') as f:
-        f.write(line+"\n")
+    with open(cache_file, "a") as f:
+        f.write(line + "\n")
 
 
 def parse_asm(code, isa):
@@ -538,11 +590,15 @@ def parse_asm(code, isa):
     return asm_lines
 
 
-def asm_instrumentation(input_file, output_file=None,
-                        block_selection='auto',
-                        pointer_increment='auto_with_manual_fallback',
-                        debug=False,
-                        isa='x86', cache=True):
+def asm_instrumentation(
+    input_file,
+    output_file=None,
+    block_selection="auto",
+    pointer_increment="auto_with_manual_fallback",
+    debug=False,
+    isa="x86",
+    cache=True,
+):
     """
     Add markers to an assembly file.
 
@@ -569,14 +625,15 @@ def asm_instrumentation(input_file, output_file=None,
         output_file.truncate()
 
     if debug:
-        block_selection = 'manual'
+        block_selection = "manual"
 
     loop_blocks = find_basic_loop_bodies(asm_lines)
-    if block_selection == 'auto':
+    if block_selection == "auto":
         block_label = ISA.get_isa(isa).select_best_block(loop_blocks)
-    elif block_selection == 'manual':
+    elif block_selection == "manual":
         block_label = userselect_block(
-            loop_blocks, default=ISA.get_isa(isa).select_best_block(loop_blocks), debug=debug)
+            loop_blocks, default=ISA.get_isa(isa).select_best_block(loop_blocks), debug=debug
+        )
     elif isinstance(block_selection, int):
         block_label = block_selection
     else:
@@ -589,43 +646,54 @@ def asm_instrumentation(input_file, output_file=None,
 
     # Extract store pointer increment
     if not isinstance(pointer_increment, int):
-        if pointer_increment == 'auto':
+        if pointer_increment == "auto":
             pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines)
             if pointer_increment is None:
                 if output_file is not None:
                     os.unlink(output_file.name)
-                raise RuntimeError("pointer_increment could not be detected automatically. Use "
-                                   "--pointer-increment to set manually to byte offset of store "
-                                   "pointer address between consecutive assembly block iterations. "
-                                   "Alternativley add the following line to ~/.kerncraft/"
-                                   "increment_cache: {} <pointer_increment>".format(block_hashstr))
-        elif pointer_increment == 'auto_with_manual_fallback':
+                raise RuntimeError(
+                    "pointer_increment could not be detected automatically. Use "
+                    "--pointer-increment to set manually to byte offset of store "
+                    "pointer address between consecutive assembly block iterations. "
+                    "Alternativley add the following line to ~/.kerncraft/"
+                    "increment_cache: {} <pointer_increment>".format(block_hashstr)
+                )
+        elif pointer_increment == "auto_with_manual_fallback":
             pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines)
             if pointer_increment is None:
                 pointer_increment = userselect_increment(block_lines, comment=input_file)
-        elif pointer_increment == 'manual':
+        elif pointer_increment == "manual":
             pointer_increment = ISA.get_isa(isa).get_pointer_increment(block_lines)
             pointer_increment = userselect_increment(
-                block_lines, default=pointer_increment, comment=input_file)
+                block_lines, default=pointer_increment, comment=input_file
+            )
         else:
-            raise ValueError("pointer_increment has to be an integer, 'auto', 'manual' or  "
-                             "'auto_with_manual_fallback' ")
+            raise ValueError(
+                "pointer_increment has to be an integer, 'auto', 'manual' or  "
+                "'auto_with_manual_fallback' "
+            )
 
     marker_start, marker_end = get_marker(
-        isa, comment="pointer_increment={} {}".format(pointer_increment, block_hashstr))
+        isa, comment="pointer_increment={} {}".format(pointer_increment, block_hashstr)
+    )
 
-    marked_asm = asm_lines[:block_start] + marker_start + asm_lines[block_start:block_end] + \
-                 marker_end + asm_lines[block_end:]
+    marked_asm = (
+        asm_lines[:block_start]
+        + marker_start
+        + asm_lines[block_start:block_end]
+        + marker_end
+        + asm_lines[block_end:]
+    )
 
     if output_file is not None:
-        output_file.writelines([l.line+'\n' for l in marked_asm])
+        output_file.writelines([l.line + "\n" for l in marked_asm])
 
     return block_lines, pointer_increment
 
 
 def osaca_analyse_instrumented_assembly(
-    instrumented_assembly_file, micro_architecture, assign_optimal_throughput=True,
-    isa=None):
+    instrumented_assembly_file, micro_architecture, assign_optimal_throughput=True, isa=None
+):
     """
     Run OSACA analysis on an instrumented assembly.
 
@@ -661,32 +729,33 @@ def osaca_analyse_instrumented_assembly(
     lcd_dict = kernel_graph.get_loopcarried_dependencies()
     max_lcd = 0
     for dep in lcd_dict:
-        max_lcd = max(
-            max_lcd,
-            lcd_dict[dep]['latency'])
+        max_lcd = max(max_lcd, lcd_dict[dep]["latency"])
     # Critical-Path Analysis
     cp_list = kernel_graph.get_critical_path()
 
-    result['output'] = frontend.full_analysis(kernel, kernel_graph, verbose=True)
-    result['analyzed kernel'] = kernel
-    result['port cycles'] = OrderedDict(list(zip(osaca_machine_model['ports'], throughput_values)))
-    result['throughput'] = max(throughput_values + [max_lcd])
-    result['lcd'] = max_lcd
-    result['cp_latency'] = sum([x.latency_cp for x in cp_list])
-    result['uops'] = None  # Not given by OSACA
+    result["output"] = frontend.full_analysis(kernel, kernel_graph, verbose=True)
+    result["analyzed kernel"] = kernel
+    result["port cycles"] = OrderedDict(list(zip(osaca_machine_model["ports"], throughput_values)))
+    result["throughput"] = max(throughput_values + [max_lcd])
+    result["lcd"] = max_lcd
+    result["cp_latency"] = sum([x.latency_cp for x in cp_list])
+    result["uops"] = None  # Not given by OSACA
 
     unmatched_ratio = osaca.get_unmatched_instruction_ratio(kernel)
     if unmatched_ratio > 0.1:
-        print('WARNING: {:.0%} of the instruction could not be matched during incore analysis '
-              'with OSACA. Fix this by extending OSACAs instruction form database with the '
-              'required instructions.'.format(unmatched_ratio),
-              file=sys.stderr)
+        print(
+            "WARNING: {:.0%} of the instruction could not be matched during incore analysis "
+            "with OSACA. Fix this by extending OSACAs instruction form database with the "
+            "required instructions.".format(unmatched_ratio),
+            file=sys.stderr,
+        )
 
     return result
 
 
 def llvm_mca_analyse_instrumented_assembly(
-        instrumented_assembly_file, micro_architecture, isa='x86'):
+    instrumented_assembly_file, micro_architecture, isa="x86"
+):
     """
     Run LLVM-MCA analysis on an instrumented assembly.
 
@@ -703,26 +772,28 @@ def llvm_mca_analyse_instrumented_assembly(
     with open(instrumented_assembly_file) as f:
         parsed_code = parse_asm(f.read(), isa)
     kernel = osaca.reduce_to_section(parsed_code, isa)
-    assembly_section = '\n'.join([l.line for l in kernel])
+    assembly_section = "\n".join([l.line for l in kernel])
 
     output = subprocess.check_output(
-        ['llvm-mca'] + micro_architecture.split(' ') + 
-        ['--timeline', '--timeline-max-cycles=1000', '--timeline-max-iterations=4'],
-        input=assembly_section.encode('utf-8')).decode('utf-8')
-    result['output'] = output
+        ["llvm-mca"]
+        + micro_architecture.split(" ")
+        + ["--timeline", "--timeline-max-cycles=1000", "--timeline-max-iterations=4"],
+        input=assembly_section.encode("utf-8"),
+    ).decode("utf-8")
+    result["output"] = output
 
     # Extract port names
     port_names = OrderedDict()
-    m = re.search(r'Resources:\n(?:[^\n]+\n)+', output)
-    for m in re.finditer(r'(\[[0-9\.]+\])\s+-\s+([a-zA-Z0-9]+)', m.group()):
+    m = re.search(r"Resources:\n(?:[^\n]+\n)+", output)
+    for m in re.finditer(r"(\[[0-9\.]+\])\s+-\s+([a-zA-Z0-9]+)", m.group()):
         port_names[m.group(1)] = m.group(2)
 
     # Extract cycles per port
     port_cycles = OrderedDict()
-    m = re.search(r'Resource pressure per iteration:\n[^\n]+\n[^\n]+', output)
-    port_cycle_lines = m.group().split('\n')[1:]
+    m = re.search(r"Resource pressure per iteration:\n[^\n]+\n[^\n]+", output)
+    port_cycle_lines = m.group().split("\n")[1:]
     for port, cycles in zip(port_cycle_lines[0].split(), port_cycle_lines[1].split()):
-        if cycles == '-':
+        if cycles == "-":
             cycles = 0.0
         if port_names[port] in port_cycles:
             # Some architecures have multiple "ports" per resource in LLVM-MCA
@@ -731,29 +802,29 @@ def llvm_mca_analyse_instrumented_assembly(
             port_cycles[port_names[port]] = max(float(cycles), port_cycles[port_names[port]])
         else:
             port_cycles[port_names[port]] = float(cycles)
-    result['port cycles'] = port_cycles
-    
+    result["port cycles"] = port_cycles
+
     # Extract throughput including loop-carried-dependecy latency
-    total_cycles = int(re.search(r'Total Cycles:\s+([0-9]+)', output).group(1))
-    iterations = int(re.search(r'Iterations:\s+([0-9]+)', output).group(1))
+    total_cycles = int(re.search(r"Total Cycles:\s+([0-9]+)", output).group(1))
+    iterations = int(re.search(r"Iterations:\s+([0-9]+)", output).group(1))
     lcd = total_cycles / iterations
-    result['lcd'] = lcd
-    result['throughput'] = lcd
+    result["lcd"] = lcd
+    result["throughput"] = lcd
 
     # Extract critical path latency
     # find cycle distance between first D and last R in first iteration
-    timeline_lines = [l for l in output.split('\n') if re.match(r'\[[0-9]+,[0-9]+\]', l)]
+    timeline_lines = [l for l in output.split("\n") if re.match(r"\[[0-9]+,[0-9]+\]", l)]
     cp_start = float("inf")
     cp_end = 0
     for l in timeline_lines:
-        if l.startswith('[0,'):
-            cp_start = min(l.index('D'), cp_start)
-            cp_end = max(l.index('R'), cp_end)
-    result['cp_latency'] = cp_end - cp_start
+        if l.startswith("[0,"):
+            cp_start = min(l.index("D"), cp_start)
+            cp_end = max(l.index("R"), cp_end)
+    result["cp_latency"] = cp_end - cp_start
 
     # Extract uops
-    total_uops = int(re.search(r'Total uOps:\s+([0-9]+)', output).group(1))
-    result['uops'] = total_uops / iterations
+    total_uops = int(re.search(r"Total uOps:\s+([0-9]+)", output).group(1))
+    result["uops"] = total_uops / iterations
 
     return result
 
@@ -774,95 +845,120 @@ def iaca_analyse_instrumented_binary(instrumented_binary_file, micro_architectur
     # Select IACA version and executable based on micro_architecture:
     arch_map = {
         # arch: (binary name, version string, required additional arguments)
-        'NHM': ('iaca2.2', 'v2.2', ['-64']),
-        'WSM': ('iaca2.2', 'v2.2', ['-64']),
-        'SNB': ('iaca2.3', 'v2.3', ['-64']),
-        'IVB': ('iaca2.3', 'v2.3', ['-64']),
-        'HSW': ('iaca3.0', 'v3.0', []),
-        'BDW': ('iaca3.0', 'v3.0', []),
-        'SKL': ('iaca3.0', 'v3.0', []),
-        'SKX': ('iaca3.0', 'v3.0', []),
+        "NHM": ("iaca2.2", "v2.2", ["-64"]),
+        "WSM": ("iaca2.2", "v2.2", ["-64"]),
+        "SNB": ("iaca2.3", "v2.3", ["-64"]),
+        "IVB": ("iaca2.3", "v2.3", ["-64"]),
+        "HSW": ("iaca3.0", "v3.0", []),
+        "BDW": ("iaca3.0", "v3.0", []),
+        "SKL": ("iaca3.0", "v3.0", []),
+        "SKX": ("iaca3.0", "v3.0", []),
     }
 
     if micro_architecture not in arch_map:
-        raise ValueError('Invalid micro_architecture selected ({}), valid options are {}'.format(
-            micro_architecture, ', '.join(arch_map.keys())))
+        raise ValueError(
+            "Invalid micro_architecture selected ({}), valid options are {}".format(
+                micro_architecture, ", ".join(arch_map.keys())
+            )
+        )
 
     iaca_path = iaca_get.find_iaca()  # Throws exception if not found
-    os.environ['PATH'] += ':' + iaca_path
+    os.environ["PATH"] += ":" + iaca_path
 
     iaca_exec, iaca_version, base_args = arch_map[micro_architecture]
     if find_executable(iaca_exec) is None:
-        raise RuntimeError("{0} executable was not found. Make sure that {0} is found in "
-                           "{1}. Install using iaca_get.".format(iaca_exec, iaca_path))
+        raise RuntimeError(
+            "{0} executable was not found. Make sure that {0} is found in "
+            "{1}. Install using iaca_get.".format(iaca_exec, iaca_path)
+        )
 
     result = {}
 
-    cmd = [iaca_exec] + base_args + ['-arch', micro_architecture, instrumented_binary_file]
+    cmd = [iaca_exec] + base_args + ["-arch", micro_architecture, instrumented_binary_file]
     try:
-        iaca_output = subprocess.check_output(cmd).decode('utf-8')
-        result['output'] = iaca_output
+        iaca_output = subprocess.check_output(cmd).decode("utf-8")
+        result["output"] = iaca_output
     except OSError as e:
-        raise RuntimeError("IACA execution failed:" + ' '.join(cmd) + '\n' + str(e))
+        raise RuntimeError("IACA execution failed:" + " ".join(cmd) + "\n" + str(e))
     except subprocess.CalledProcessError as e:
         raise RuntimeError("IACA throughput analysis failed:" + str(e))
 
     # Get total cycles per loop iteration
-    match = re.search(r'^Block Throughput: ([0-9.]+) Cycles', iaca_output, re.MULTILINE)
+    match = re.search(r"^Block Throughput: ([0-9.]+) Cycles", iaca_output, re.MULTILINE)
     assert match, "Could not find Block Throughput in IACA output."
     throughput = float(match.groups()[0])
-    result['throughput'] = throughput
+    result["throughput"] = throughput
 
     # Find ports and cycles per port
-    ports = [l for l in iaca_output.split('\n') if l.startswith('|  Port  |')]
-    cycles = [l for l in iaca_output.split('\n') if l.startswith('| Cycles |')]
+    ports = [l for l in iaca_output.split("\n") if l.startswith("|  Port  |")]
+    cycles = [l for l in iaca_output.split("\n") if l.startswith("| Cycles |")]
     assert ports and cycles, "Could not find ports/cycles lines in IACA output."
-    ports = [p.strip() for p in ports[0].split('|')][2:]
-    cycles = [c.strip() for c in cycles[0].split('|')][2:]
+    ports = [p.strip() for p in ports[0].split("|")][2:]
+    cycles = [c.strip() for c in cycles[0].split("|")][2:]
     port_cycles = []
     for i in range(len(ports)):
-        if '-' in ports[i] and ' ' in cycles[i]:
-            subports = [p.strip() for p in ports[i].split('-')]
-            subcycles = [c for c in cycles[i].split(' ') if bool(c)]
+        if "-" in ports[i] and " " in cycles[i]:
+            subports = [p.strip() for p in ports[i].split("-")]
+            subcycles = [c for c in cycles[i].split(" ") if bool(c)]
             port_cycles.append((subports[0], float(subcycles[0])))
             port_cycles.append((subports[0] + subports[1], float(subcycles[1])))
         elif ports[i] and cycles[i]:
             port_cycles.append((ports[i], float(cycles[i])))
-    result['port cycles'] = OrderedDict(port_cycles)
+    result["port cycles"] = OrderedDict(port_cycles)
 
-    match = re.search(r'^Total Num Of Uops: ([0-9]+)', iaca_output, re.MULTILINE)
+    match = re.search(r"^Total Num Of Uops: ([0-9]+)", iaca_output, re.MULTILINE)
     assert match, "Could not find Uops in IACA output."
-    result['uops'] = float(match.groups()[0])
-    result['cp_latency'] = None
-    result['lcd'] = None
+    result["uops"] = float(match.groups()[0])
+    result["cp_latency"] = None
+    result["lcd"] = None
     return result
 
 
 def main():
     """Execute command line interface."""
     parser = argparse.ArgumentParser(
-        description='Find and analyze basic loop blocks and mark for IACA.',
-        epilog='For help, examples, documentation and bug reports go to:\nhttps://github.com'
-               '/RRZE-HPC/kerncraft\nLicense: AGPLv3')
-    parser.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__))
-    parser.add_argument('source', type=argparse.FileType(), nargs='?', default=sys.stdin,
-                        help='assembly file to analyze (default: stdin)')
-    parser.add_argument('--outfile', '-o', type=argparse.FileType('w'), nargs='?',
-                        default=sys.stdout, help='output file location (default: stdout)')
-    parser.add_argument('--debug', action='store_true',
-                        help='Output internal analysis information for debugging.')
-    parser.add_argument('--isa', default='x86', choices=['x86', 'aarch64'])
-    parser.add_argument('--cache', action='store_true',
-                        help='Consult cache and store manual setting there.')
+        description="Find and analyze basic loop blocks and mark for IACA.",
+        epilog="For help, examples, documentation and bug reports go to:\nhttps://github.com"
+        "/RRZE-HPC/kerncraft\nLicense: AGPLv3",
+    )
+    parser.add_argument("--version", action="version", version="%(prog)s {}".format(__version__))
+    parser.add_argument(
+        "source",
+        type=argparse.FileType(),
+        nargs="?",
+        default=sys.stdin,
+        help="assembly file to analyze (default: stdin)",
+    )
+    parser.add_argument(
+        "--outfile",
+        "-o",
+        type=argparse.FileType("w"),
+        nargs="?",
+        default=sys.stdout,
+        help="output file location (default: stdout)",
+    )
+    parser.add_argument(
+        "--debug", action="store_true", help="Output internal analysis information for debugging."
+    )
+    parser.add_argument("--isa", default="x86", choices=["x86", "aarch64"])
+    parser.add_argument(
+        "--cache", action="store_true", help="Consult cache and store manual setting there."
+    )
     args = parser.parse_args()
 
     # pointer_increment is given, since it makes no difference on the command lien and requires
     # less user input
-    pointer_increment = 'auto_with_manual_fallback'
-    asm_instrumentation(input_file=args.source, output_file=args.outfile,
-                        block_selection='manual', pointer_increment='auto_with_manual_fallback',
-                        debug=args.debug, isa=args.isa, cache=args.cache)
+    pointer_increment = "auto_with_manual_fallback"
+    asm_instrumentation(
+        input_file=args.source,
+        output_file=args.outfile,
+        block_selection="manual",
+        pointer_increment="auto_with_manual_fallback",
+        debug=args.debug,
+        isa=args.isa,
+        cache=args.cache,
+    )
 
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+if __name__ == "__main__":
+    main()

From dd17e73abbae904e4483c20f5f9a52fb4c11e912 Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Thu, 2 May 2024 14:57:39 +0200
Subject: [PATCH 7/9] run the TP assignment twice for better scheduling

---
 kerncraft/incore_model.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py
index 6561f5a..6bd56b0 100755
--- a/kerncraft/incore_model.py
+++ b/kerncraft/incore_model.py
@@ -647,6 +647,7 @@ def osaca_analyse_instrumented_assembly(
     semantics.add_semantics(kernel)
     if assign_optimal_throughput:
         semantics.assign_optimal_throughput(kernel)
+        semantics.assign_optimal_throughput(kernel)
 
     kernel_graph = osaca.KernelDG(kernel, parser, osaca_machine_model, semantics)
     frontend = osaca.Frontend(instrumented_assembly_file, arch=micro_architecture)

From e6b6ec55d76b1019a51a753796ee32ec693a27dc Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Wed, 4 Sep 2024 11:14:07 +0200
Subject: [PATCH 8/9] removed unnecessary installs/imports

---
 .github/workflows/test-n-publish.yml | 3 ---
 kerncraft/incore_model.py            | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index a1fe18b..824b1eb 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -18,11 +18,8 @@ jobs:
     - name: Install
       run: |
         python -m pip install --upgrade pip
-        python -m pip install -U PyYAML
-        python -m pip install "git+https://github.com/RRZE-HPC/OSACA.git@InstrucForm"
         python -m pip install codecov requests sympy
         python -m pip install -e .
-        #iaca_get --I-accept-the-Intel-What-If-Pre-Release-License-Agreement-and-please-take-my-soul
     - name: Test
       run: |
         coverage run -p tests/all_tests.py
diff --git a/kerncraft/incore_model.py b/kerncraft/incore_model.py
index c4ddd3c..40387f7 100755
--- a/kerncraft/incore_model.py
+++ b/kerncraft/incore_model.py
@@ -5,13 +5,9 @@
 import re
 import subprocess
 import os
-from copy import copy
 import argparse
-from pprint import pformat, pprint
 import pathlib
-import textwrap
 from collections import OrderedDict, defaultdict
-import io
 from hashlib import md5
 from os.path import expanduser
 from itertools import chain

From 1fbd41cb8c386853c6f998124dba0b1e20680be9 Mon Sep 17 00:00:00 2001
From: JanLJL <jan.laukemann@fau.de>
Date: Wed, 4 Sep 2024 12:13:36 +0200
Subject: [PATCH 9/9] added OSACA for GH Actions

---
 .github/workflows/test-n-publish.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-n-publish.yml b/.github/workflows/test-n-publish.yml
index 824b1eb..0e35354 100644
--- a/.github/workflows/test-n-publish.yml
+++ b/.github/workflows/test-n-publish.yml
@@ -19,6 +19,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install codecov requests sympy
+        python -m pip install "osaca>=0.6.0"
         python -m pip install -e .
     - name: Test
       run: |