Fix JVM Method Too Large error with improved emit-time refactoring

fglock · fglock · commit 2fc8750e69fe · 2025-10-03T12:42:09.000+02:00
- Added recursion prevention using 'blockAlreadyRefactored' annotation
  to prevent infinite loops when refactoring large blocks
- Lowered refactoring threshold from 16 to 8 elements for more aggressive
  block splitting, enabling even massive subroutines like pat.t to run
- Cleaned up debug statements for production use
- Removed unused parse-time refactoring code from BlockNode

Results:
- pack.t runs successfully with all tests passing
- t/re/pat.t now runs all 1296 tests without Method Too Large errors
- No more StackOverflowError or infinite recursion issues

Usage: Set JPERL_LARGECODE=refactor when running large Perl scripts
diff --git a/src/main/java/org/perlonjava/codegen/EmitBlock.java b/src/main/java/org/perlonjava/codegen/EmitBlock.java
@@ -3,20 +3,12 @@
 import org.objectweb.asm.Label;
 import org.objectweb.asm.MethodVisitor;
 import org.perlonjava.astnode.*;
-import org.perlonjava.astvisitor.ControlFlowDetectorVisitor;
 import org.perlonjava.astvisitor.EmitterVisitor;
-import org.perlonjava.runtime.GlobalVariable;
 import org.perlonjava.runtime.RuntimeContextType;
 
 import java.util.List;
 
 public class EmitBlock {
-    // Blocks with too many statements are emitted as a separate subroutine
-    // in order to avoid "Method too large" error test: in t/re/pat.t
-    final static int LARGE_BLOCK = 16;
-    
-    // Reusable visitor for control flow detection
-    private static final ControlFlowDetectorVisitor controlFlowDetector = new ControlFlowDetectorVisitor();
 
     /**
      * Emits bytecode for a block of statements.
@@ -26,37 +18,10 @@ public class EmitBlock {
      */
     public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {
         MethodVisitor mv = emitterVisitor.ctx.mv;
-
-        // Check if we can emit this as a subroutine, to avoid "Method too large" error.
-        // Check for control flow that would break if refactored
-        boolean hasUnsafeControlFlow = false;
-        if (node.elements.size() > LARGE_BLOCK && !node.getBooleanAnnotation("blockIsSubroutine")) {
-            // Use visitor pattern to check for unsafe control flow
-            controlFlowDetector.reset();
-            node.accept(controlFlowDetector);
-            hasUnsafeControlFlow = controlFlowDetector.hasUnsafeControlFlow();
-        }
         
-        if (node.elements.size() > LARGE_BLOCK
-                && !emitterVisitor.ctx.javaClassInfo.gotoLabelStack.isEmpty()
-                && !node.getBooleanAnnotation("blockIsSubroutine")
-                && !hasUnsafeControlFlow) {
-            // Create sub {...}->(@_)
-            int index = node.tokenIndex;
-            ListNode args = new ListNode(index);
-            args.elements.add(new OperatorNode("@", new IdentifierNode("_", index), index));
-            BinaryOperatorNode subr = new BinaryOperatorNode(
-                    "->",
-                    new SubroutineNode(
-                            null, null, null,
-                            new BlockNode(List.of(node), index),
-                            false,
-                            index
-                    ),
-                    args,
-                    index
-            );
-            subr.accept(emitterVisitor);
+        // Try to refactor large blocks using the helper class
+        if (LargeBlockRefactorer.processBlock(emitterVisitor, node)) {
+            // Block was refactored and emitted by the helper
             return;
         }
 
@@ -127,4 +92,23 @@ public static void emitBlock(EmitterVisitor emitterVisitor, BlockNode node) {
         emitterVisitor.ctx.symbolTable.exitScope(scopeIndex);
         emitterVisitor.ctx.logDebug("generateCodeBlock end");
     }
+
+    private static BinaryOperatorNode refactorBlockToSub(BlockNode node) {
+        // Create sub {...}->(@_)
+        int index = node.tokenIndex;
+        ListNode args = new ListNode(index);
+        args.elements.add(new OperatorNode("@", new IdentifierNode("_", index), index));
+        BinaryOperatorNode subr = new BinaryOperatorNode(
+                "->",
+                new SubroutineNode(
+                        null, null, null,
+                        new BlockNode(List.of(node), index),
+                        false,
+                        index
+                ),
+                args,
+                index
+        );
+        return subr;
+    }
 }
diff --git a/src/main/java/org/perlonjava/codegen/LargeBlockRefactorer.java b/src/main/java/org/perlonjava/codegen/LargeBlockRefactorer.java
@@ -0,0 +1,261 @@
+package org.perlonjava.codegen;
+
+import org.perlonjava.astnode.*;
+import org.perlonjava.astvisitor.ControlFlowDetectorVisitor;
+import org.perlonjava.astvisitor.EmitterVisitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Helper class for refactoring large blocks to avoid JVM's "Method too large" error.
+ * 
+ * This class encapsulates all logic for detecting and transforming large blocks,
+ * including smart chunking strategies and control flow analysis.
+ */
+public class LargeBlockRefactorer {
+    
+    // Configuration thresholds
+    private static final int LARGE_BLOCK_ELEMENT_COUNT = 8;  // Lowered from 16 for more aggressive refactoring
+    private static final int LARGE_BYTECODE_SIZE = 30000;
+    private static final int MIN_CHUNK_SIZE = 4;  // Minimum statements to extract as a chunk
+    
+    // Reusable visitor for control flow detection
+    private static final ControlFlowDetectorVisitor controlFlowDetector = new ControlFlowDetectorVisitor();
+    
+    /**
+     * Process a block and refactor it if necessary to avoid method size limits.
+     * 
+     * @param emitterVisitor The emitter visitor context
+     * @param node The block to process
+     * @return true if the block was refactored and emitted, false if no refactoring was needed
+     */
+    public static boolean processBlock(EmitterVisitor emitterVisitor, BlockNode node) {
+        // CRITICAL: Skip if this block was already refactored to prevent infinite recursion
+        if (node.getBooleanAnnotation("blockAlreadyRefactored")) {
+            return false;
+        }
+        
+        // Check if refactoring is enabled via environment variable
+        String largeCodeMode = System.getenv("JPERL_LARGECODE");
+        boolean refactorEnabled = "refactor".equals(largeCodeMode);
+        
+        // Skip if block is already a subroutine or is a special block
+        if (node.getBooleanAnnotation("blockIsSubroutine")) {
+            return false;
+        }
+        
+        // Determine if we need to refactor
+        boolean needsRefactoring = shouldRefactorBlock(node, emitterVisitor, refactorEnabled);
+        
+        if (!needsRefactoring) {
+            return false;
+        }
+        
+        // Skip refactoring for special blocks (BEGIN, END, INIT, CHECK, UNITCHECK)
+        // These blocks have special compilation semantics and cannot be refactored
+        if (isSpecialContext(node)) {
+            return false;
+        }
+        
+        // TEMPORARILY DISABLED: Smart chunking has timing issues with special blocks (BEGIN/require)
+        // Causes NPE in SpecialBlockParser when functions aren't defined yet during compilation
+        // if (trySmartChunking(node)) {
+        //     // Block was successfully chunked, continue with normal emission
+        //     return false;
+        // }
+        
+        // Fallback: Try whole-block refactoring
+        if (tryWholeBlockRefactoring(emitterVisitor, node)) {
+            return true;  // Block was refactored and emitted
+        }
+        
+        // No refactoring was possible
+        return false;
+    }
+    
+    /**
+     * Determine if a block should be refactored based on size and context.
+     */
+    private static boolean shouldRefactorBlock(BlockNode node, EmitterVisitor emitterVisitor, boolean refactorEnabled) {
+        // Check element count threshold
+        if (node.elements.size() <= LARGE_BLOCK_ELEMENT_COUNT) {
+            return false;
+        }
+        
+        // Check if we're in a context that allows refactoring
+        return refactorEnabled || !emitterVisitor.ctx.javaClassInfo.gotoLabelStack.isEmpty();
+    }
+    
+    /**
+     * Check if the block is in a special context where smart chunking should be avoided.
+     */
+    private static boolean isSpecialContext(BlockNode node) {
+        return node.getBooleanAnnotation("blockIsSpecial") || 
+               node.getBooleanAnnotation("blockIsBegin") ||
+               node.getBooleanAnnotation("blockIsRequire") ||
+               node.getBooleanAnnotation("blockIsInit");
+    }
+    
+    /**
+     * Try to apply smart chunking to reduce the number of top-level elements.
+     * 
+     * @param node The block to chunk
+     * @return true if chunking was successful, false otherwise
+     */
+    private static boolean trySmartChunking(BlockNode node) {
+        List<Node> processedElements = new ArrayList<>();
+        List<Node> currentChunk = new ArrayList<>();
+        
+        for (Node element : node.elements) {
+            if (shouldBreakChunk(element)) {
+                // This element cannot be in a chunk
+                processChunk(currentChunk, processedElements, node.tokenIndex);
+                currentChunk.clear();
+                
+                // Add the unsafe element directly
+                processedElements.add(element);
+            } else if (isCompleteBlock(element)) {
+                // Complete blocks are already scoped
+                processChunk(currentChunk, processedElements, node.tokenIndex);
+                currentChunk.clear();
+                processedElements.add(element);
+            } else {
+                // Safe element, add to current chunk
+                currentChunk.add(element);
+            }
+        }
+        
+        // Process any remaining chunk
+        processChunk(currentChunk, processedElements, node.tokenIndex);
+        
+        // Apply chunking if we reduced the element count
+        if (processedElements.size() < node.elements.size()) {
+            node.elements.clear();
+            node.elements.addAll(processedElements);
+            return true;
+        }
+        
+        return false;
+    }
+    
+    /**
+     * Determine if an element should break the current chunk.
+     */
+    private static boolean shouldBreakChunk(Node element) {
+        // Labels break chunks
+        if (element instanceof LabelNode) {
+            return true;
+        }
+        
+        // Control flow statements break chunks
+        controlFlowDetector.reset();
+        element.accept(controlFlowDetector);
+        if (controlFlowDetector.hasUnsafeControlFlow()) {
+            return true;
+        }
+        
+        // Top-level variable declarations break chunks (unless in a block)
+        if (!isCompleteBlock(element) && hasVariableDeclaration(element)) {
+            return true;
+        }
+        
+        return false;
+    }
+    
+    /**
+     * Process accumulated chunk statements.
+     */
+    private static void processChunk(List<Node> chunk, List<Node> processedElements, int tokenIndex) {
+        if (chunk.isEmpty()) {
+            return;
+        }
+        
+        if (chunk.size() >= MIN_CHUNK_SIZE) {
+            // Create a closure for this chunk: sub { ... }->()
+            BlockNode chunkBlock = new BlockNode(new ArrayList<>(chunk), tokenIndex);
+            BinaryOperatorNode closure = new BinaryOperatorNode(
+                "->",
+                new SubroutineNode(null, null, null, chunkBlock, false, tokenIndex),
+                new ListNode(tokenIndex),  // Empty args - closures capture outer scope
+                tokenIndex
+            );
+            processedElements.add(closure);
+        } else {
+            // Chunk too small, add elements directly
+            processedElements.addAll(chunk);
+        }
+    }
+    
+    /**
+     * Try to refactor the entire block as a subroutine.
+     */
+    private static boolean tryWholeBlockRefactoring(EmitterVisitor emitterVisitor, BlockNode node) {
+        // Check for unsafe control flow
+        controlFlowDetector.reset();
+        node.accept(controlFlowDetector);
+        if (controlFlowDetector.hasUnsafeControlFlow()) {
+            return false;
+        }
+        
+        // Create sub {...}->(@_) for whole block
+        int index = node.tokenIndex;
+        ListNode args = new ListNode(index);
+        args.elements.add(new OperatorNode("@", new IdentifierNode("_", index), index));
+        
+        // IMPORTANT: Mark the original block as already refactored to prevent recursion
+        node.setAnnotation("blockAlreadyRefactored", true);
+        
+        // Create a wrapper block containing the original block
+        BlockNode innerBlock = new BlockNode(List.of(node), index);
+        
+        BinaryOperatorNode subr = new BinaryOperatorNode(
+            "->",
+            new SubroutineNode(
+                null, null, null,
+                innerBlock,
+                false,
+                index
+            ),
+            args,
+            index
+        );
+        
+        // Emit the refactored block
+        subr.accept(emitterVisitor);
+        return true;
+    }
+    
+    /**
+     * Check if a node contains variable declarations (my, our, local).
+     */
+    private static boolean hasVariableDeclaration(Node node) {
+        // Pattern 1: Direct declaration without assignment
+        if (node instanceof OperatorNode) {
+            OperatorNode op = (OperatorNode) node;
+            return "my".equals(op.operator) || "our".equals(op.operator) || "local".equals(op.operator);
+        }
+        
+        // Pattern 2: Declaration with assignment
+        if (node instanceof BinaryOperatorNode) {
+            BinaryOperatorNode bin = (BinaryOperatorNode) node;
+            if ("=".equals(bin.operator) && bin.left instanceof OperatorNode) {
+                OperatorNode left = (OperatorNode) bin.left;
+                return "my".equals(left.operator) || "our".equals(left.operator) || "local".equals(left.operator);
+            }
+        }
+        
+        return false;
+    }
+    
+    /**
+     * Check if a node is a complete block/loop with its own scope.
+     */
+    private static boolean isCompleteBlock(Node node) {
+        return node instanceof BlockNode || 
+               node instanceof For1Node || 
+               node instanceof For3Node ||
+               node instanceof IfNode ||
+               node instanceof TryNode;
+    }
+}
diff --git a/src/main/java/org/perlonjava/parser/SpecialBlockParser.java b/src/main/java/org/perlonjava/parser/SpecialBlockParser.java
@@ -167,6 +167,9 @@ static RuntimeList runSpecialBlock(Parser parser, String blockPhase, Node block)
             }
 
             String message = t.getMessage();
+            if (message == null) {
+                message = t.getClass().getSimpleName() + " during " + blockPhase;
+            }
             if (!message.endsWith("\n")) {
                 message += "\n";
             }

Original file line number	Diff line number	Diff line change
`@@ -167,6 +167,9 @@ static RuntimeList runSpecialBlock(Parser parser, String blockPhase, Node block)`
`167`	`167`	`}`
`168`	`168`
`169`	`169`	`String message = t.getMessage();`
	`170`	`+ if (message == null) {`
	`171`	`+ message = t.getClass().getSimpleName() + " during " + blockPhase;`
	`172`	`+ }`
`170`	`173`	`if (!message.endsWith("\n")) {`
`171`	`174`	`message += "\n";`
`172`	`175`	`}`