From 0d3df342d74bfd7f9e535f82afb0280e769c995d Mon Sep 17 00:00:00 2001 From: ErikDanielsson Date: Thu, 31 Jul 2025 16:07:47 +0200 Subject: [PATCH 1/5] Pass comments on COMMENTS channel --- modules/nf-lang/src/main/antlr/ScriptLexer.g4 | 10 ++- .../nextflow/script/formatter/Formatter.java | 48 ++++++++++---- .../java/nextflow/script/parser/Comment.java | 34 ++++++++++ .../script/parser/ScriptAstBuilder.java | 65 ++++++++++++++----- 4 files changed, 126 insertions(+), 31 deletions(-) create mode 100644 modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java diff --git a/modules/nf-lang/src/main/antlr/ScriptLexer.g4 b/modules/nf-lang/src/main/antlr/ScriptLexer.g4 index 1861bf7278..2db54fd6da 100644 --- a/modules/nf-lang/src/main/antlr/ScriptLexer.g4 +++ b/modules/nf-lang/src/main/antlr/ScriptLexer.g4 @@ -54,6 +54,10 @@ options { superClass = AbstractLexer; } +channels { + COMMENTS // The COMMENTS channel will contain all comments, and are separatly introduced into the AST during creation +} + @header { package nextflow.script.parser; @@ -830,18 +834,18 @@ NL : LineTerminator /* { this.ignoreTokenInsideParens(); } */ // Multiple-line comments (including groovydoc comments) ML_COMMENT - : '/*' .*? '*/' /* { this.ignoreMultiLineCommentConditionally(); } */ -> type(NL) + : '/*' .*? '*/' /* { this.ignoreMultiLineCommentConditionally(); } */ -> channel(COMMENTS) ; // Single-line comments SL_COMMENT - : '//' ~[\r\n\uFFFF]* /* { this.ignoreTokenInsideParens(); } */ -> type(NL) + : '//' ~[\r\n\uFFFF]* /* { this.ignoreTokenInsideParens(); } */ -> channel(COMMENTS) ; // Script-header comments. // The very first characters of the file may be "#!". If so, ignore the first line. SH_COMMENT - : '#!' { require(errorIgnored || 0 == this.tokenIndex, "Shebang comment should appear at the first line", -2, true); } ShCommand (LineTerminator '#!' ShCommand)* -> type(NL) + : '#!' { require(errorIgnored || 0 == this.tokenIndex, "Shebang comment should appear at the first line", -2, true); } ShCommand (LineTerminator '#!' ShCommand)* -> channel(COMMENTS) ; // Unexpected characters will be handled by groovy parser later. diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java index 7f83cd24b1..c2158ebf8e 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java @@ -101,18 +101,23 @@ public void appendNewLine() { public void appendLeadingComments(ASTNode node) { var comments = (List) node.getNodeMetaData(ASTNodeMarker.LEADING_COMMENTS); - if( comments == null || comments.isEmpty() ) + append("/*l*/"); + if( comments == null || comments.isEmpty() ) { return; - - for( var line : DefaultGroovyMethods.asReversed(comments) ) { - if( "\n".equals(line) ) { - append(line); - } - else { - appendIndent(); - append(line.stripLeading()); - } } + for( var comment : comments ) { + appendIndent(); + append(comment); + } + // for( var line : DefaultGroovyMethods.asReversed(comments) ) { + // if( "\n".equals(line) ) { + // append(line); + // } + // else { + // appendIndent(); + // append(line.stripLeading()); + // } + // } } public boolean hasTrailingComment(ASTNode node) { @@ -122,6 +127,7 @@ public boolean hasTrailingComment(ASTNode node) { public void appendTrailingComment(ASTNode node) { var comment = (String) node.getNodeMetaData(ASTNodeMarker.TRAILING_COMMENT); + append("/* */"); if( comment != null ) { append(' '); append(comment); @@ -151,7 +157,7 @@ protected void visitIfElse(IfStatement node, boolean preIndent) { appendLeadingComments(node); if( preIndent ) appendIndent(); - append("if ("); + append("i f ("); visit(node.getBooleanExpression()); append(") {\n"); incIndent(); @@ -370,6 +376,7 @@ public void visitArguments(List args, boolean wrap) { @Override public void visitBinaryExpression(BinaryExpression node) { + appendLeadingComments(node); if( node instanceof DeclarationExpression ) { append("def "); inVariableDeclaration = true; @@ -434,6 +441,7 @@ public void visitBinaryExpression(BinaryExpression node) { @Override public void visitTernaryExpression(TernaryExpression node) { + appendLeadingComments(node); if( shouldWrapExpression(node) ) { visit(node.getBooleanExpression()); incIndent(); @@ -458,6 +466,7 @@ public void visitTernaryExpression(TernaryExpression node) { @Override public void visitShortTernaryExpression(ElvisOperatorExpression node) { + appendLeadingComments(node); visit(node.getTrueExpression()); append(" ?: "); visit(node.getFalseExpression()); @@ -465,12 +474,14 @@ public void visitShortTernaryExpression(ElvisOperatorExpression node) { @Override public void visitNotExpression(NotExpression node) { + appendLeadingComments(node); append('!'); visit(node.getExpression()); } @Override public void visitClosureExpression(ClosureExpression node) { + appendLeadingComments(node); append('{'); if( node.getParameters() != null && node.getParameters().length > 0 ) { append(' '); @@ -516,6 +527,7 @@ public void visitParameters(Parameter[] parameters) { @Override public void visitTupleExpression(TupleExpression node) { + appendLeadingComments(node); var wrap = shouldWrapExpression(node); append('('); if( wrap ) @@ -531,6 +543,7 @@ public void visitTupleExpression(TupleExpression node) { @Override public void visitListExpression(ListExpression node) { + appendLeadingComments(node); var wrap = hasTrailingComma(node) || shouldWrapExpression(node); append('['); if( wrap ) @@ -560,6 +573,7 @@ protected void visitPositionalArgs(List args, boolean wrap) { @Override public void visitMapExpression(MapExpression node) { + appendLeadingComments(node); if( node.getMapEntryExpressions().isEmpty() ) { append("[:]"); return; @@ -593,6 +607,7 @@ protected void visitNamedArgs(List args, boolean wrap) { @Override public void visitMapEntryExpression(MapEntryExpression node) { + appendLeadingComments(node); visit(node.getKeyExpression()); append(": "); visit(node.getValueExpression()); @@ -600,6 +615,7 @@ public void visitMapEntryExpression(MapEntryExpression node) { @Override public void visitRangeExpression(RangeExpression node) { + appendLeadingComments(node); visit(node.getFrom()); if( node.isExclusiveLeft() ) append('<'); @@ -611,24 +627,28 @@ public void visitRangeExpression(RangeExpression node) { @Override public void visitUnaryMinusExpression(UnaryMinusExpression node) { + appendLeadingComments(node); append('-'); visit(node.getExpression()); } @Override public void visitUnaryPlusExpression(UnaryPlusExpression node) { + appendLeadingComments(node); append('+'); visit(node.getExpression()); } @Override public void visitBitwiseNegationExpression(BitwiseNegationExpression node) { + appendLeadingComments(node); append('~'); visit(node.getExpression()); } @Override public void visitCastExpression(CastExpression node) { + appendLeadingComments(node); visit(node.getExpression()); append(" as "); visitTypeAnnotation(node.getType()); @@ -636,6 +656,7 @@ public void visitCastExpression(CastExpression node) { @Override public void visitConstantExpression(ConstantExpression node) { + appendLeadingComments(node); var text = (String) node.getNodeMetaData(ASTNodeMarker.VERBATIM_TEXT); if( text != null ) append(text); @@ -645,10 +666,12 @@ public void visitConstantExpression(ConstantExpression node) { @Override public void visitClassExpression(ClassExpression node) { + appendLeadingComments(node); visitTypeAnnotation(node.getType()); } public void visitTypeAnnotation(ClassNode type) { + appendLeadingComments(type); if( isLegacyType(type) ) { append(type.getNodeMetaData(ASTNodeMarker.LEGACY_TYPE)); return; @@ -659,6 +682,7 @@ public void visitTypeAnnotation(ClassNode type) { @Override public void visitVariableExpression(VariableExpression node) { + appendLeadingComments(node); if( inVariableDeclaration && isLegacyType(node.getType()) ) { visitTypeAnnotation(node.getType()); append(' '); @@ -679,6 +703,7 @@ else if( node.isSafe() ) @Override public void visitGStringExpression(GStringExpression node) { + appendLeadingComments(node); // see also: GStringUtil.writeToImpl() var quoteChar = (String) node.getNodeMetaData(ASTNodeMarker.QUOTE_CHAR, k -> DQ_STR); append(quoteChar); @@ -699,6 +724,7 @@ public void visitGStringExpression(GStringExpression node) { @Override public void visit(Expression node) { + appendLeadingComments(node); var number = (Number) node.getNodeMetaData(ASTNodeMarker.INSIDE_PARENTHESES_LEVEL); if( number != null && number.intValue() > 0 ) append('('); diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java b/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java new file mode 100644 index 0000000000..dd09ae6587 --- /dev/null +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java @@ -0,0 +1,34 @@ +package nextflow.script.parser; + +import org.antlr.v4.runtime.Token; +import org.codehaus.groovy.ast.ASTNode; + +public class Comment { + private final Token token; + private final ASTNode attachedTo; + private final boolean isLeading; + private final boolean isTrailing; + + public Comment(Token token, ASTNode attachedTo, boolean isLeading, boolean isTrailing) { + this.token = token; + this.attachedTo = attachedTo; + this.isLeading = isLeading; + this.isTrailing = isTrailing; + } + + public Token getToken() { + return token; + } + + public ASTNode getAttachedTo() { + return attachedTo; + } + + public boolean isLeading() { + return isLeading; + } + + public boolean isTrailing() { + return isTrailing; + } +} diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java index 15746bb41c..06aea61298 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; @@ -104,6 +105,7 @@ import static nextflow.script.parser.PositionConfigureUtils.ast; import static nextflow.script.parser.PositionConfigureUtils.tokenPosition; import static nextflow.script.parser.ScriptParser.*; +import static nextflow.script.parser.Comment; import static org.codehaus.groovy.ast.expr.VariableExpression.THIS_EXPRESSION; import static org.codehaus.groovy.ast.tools.GeneralUtils.*; @@ -118,6 +120,8 @@ public class ScriptAstBuilder { private ScriptNode moduleNode; private ScriptLexer lexer; private ScriptParser parser; + private CommonTokenStream commentTokens; + private List unattachedComments; private final GroovydocManager groovydocManager; private Tuple2 numberFormatError; @@ -131,6 +135,11 @@ public ScriptAstBuilder(SourceUnit sourceUnit) { this.parser = new ScriptParser(new CommonTokenStream(lexer)); parser.setErrorHandler(new DescriptiveErrorStrategy(charStream)); + // Collect all the comments from the lexer + this.commentTokens = new CommonTokenStream(lexer, ScriptLexer.COMMENTS); + this.unattachedComments = commentTokens.getTokens(); + + var groovydocEnabled = sourceUnit.getConfiguration().isGroovydocEnabled(); this.groovydocManager = new GroovydocManager(groovydocEnabled); } @@ -249,20 +258,23 @@ private ModuleNode compilationUnit(CompilationUnitContext ctx) { private boolean scriptDeclaration(ScriptDeclarationContext ctx) { if( ctx instanceof FeatureFlagDeclAltContext ffac ) { + var leadingComments = getLeadingComments(ctx); var node = featureFlagDeclaration(ffac.featureFlagDeclaration()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addFeatureFlag(node); } else if( ctx instanceof EnumDefAltContext edac ) { + var leadingComments = getLeadingComments(ctx); var node = enumDef(edac.enumDef()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addClass(node); } else if( ctx instanceof FunctionDefAltContext fdac ) { + var leadingComments = getLeadingComments(ctx); var node = functionDef(fdac.functionDef()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addFunction(node); } @@ -273,34 +285,39 @@ else if( ctx instanceof ImportDeclAltContext iac ) { } else if( ctx instanceof IncludeDeclAltContext iac ) { + var leadingComments = getLeadingComments(ctx); var node = includeDeclaration(iac.includeDeclaration()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addInclude(node); } else if( ctx instanceof OutputDefAltContext odac ) { + var leadingComments = getLeadingComments(ctx); var node = outputDef(odac.outputDef()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); if( moduleNode.getOutputs() != null ) collectSyntaxError(new SyntaxException("Output block defined more than once", node)); moduleNode.setOutputs(node); } else if( ctx instanceof ParamDeclAltContext pac ) { + var leadingComments = getLeadingComments(ctx); var node = paramDeclaration(pac.paramDeclaration()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addParam(node); } else if( ctx instanceof ProcessDefAltContext pdac ) { + var leadingComments = getLeadingComments(ctx); var node = processDef(pdac.processDef()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); moduleNode.addProcess(node); } else if( ctx instanceof WorkflowDefAltContext wdac ) { + var leadingComments = getLeadingComments(ctx); var node = workflowDef(wdac.workflowDef()); - saveLeadingComments(node, ctx); + saveLeadingComments(node, leadingComments); if( node.isEntry() ) { if( moduleNode.getEntry() != null ) collectSyntaxError(new SyntaxException("Entry workflow defined more than once", node)); @@ -665,6 +682,10 @@ private Statement incompleteScriptDeclaration(IncompleteScriptDeclarationContext private Statement statement(StatementContext ctx) { Statement result; + if( ctx instanceof EmptyStmtAltContext ) + return EmptyStatement.INSTANCE; + + List leadingComments = getLeadingComments(ctx); if( ctx instanceof IfElseStmtAltContext ieac ) result = ast( ifElseStatement(ieac.ifElseStatement()), ieac ); @@ -693,13 +714,11 @@ else if( ctx instanceof AssignmentStmtAltContext aac ) else if( ctx instanceof ExpressionStmtAltContext eac ) result = ast( expressionStatement(eac.expressionStatement()), eac ); - else if( ctx instanceof EmptyStmtAltContext ) - return EmptyStatement.INSTANCE; - else throw createParsingFailedException("Invalid statement: " + ctx.getText(), ctx); + + saveLeadingComments(result, leadingComments); - saveLeadingComments(result, ctx); return result; } @@ -1671,17 +1690,29 @@ private ClassNode legacyType(ParserRuleContext ctx) { /// COMMENTS - private void saveLeadingComments(ASTNode node, ParserRuleContext ctx) { - var comments = new ArrayList(); - var child = ctx; - while( saveLeadingComments0(child, comments) ) - child = child.getParent(); + private List getLeadingComments(ParserRuleContext ctx) { + // Get all the unattached comments up until this point + var leadingComments = new ArrayList(unattachedComments.stream().map(Token::getText).toList()); + // int stopIndex = ctx.getStart().getTokenIndex(); + // Iterator it = unattachedComments.iterator(); + // while( it.hasNext() ) { + // var comment = it.next(); + // if( comment.getTokenIndex() >= stopIndex ) + // break; + // leadingComments.add(comment.getText()); + // it.remove(); // The comment has been attached to this node, so remove it from the unattached comments list + // } + return leadingComments; + } + + private void saveLeadingComments(ASTNode node, List comments) { if( !comments.isEmpty() ) node.putNodeMetaData(ASTNodeMarker.LEADING_COMMENTS, comments); } private boolean saveLeadingComments0(ParserRuleContext ctx, List comments) { + var parent = ctx.getParent(); if( parent == null ) return false; From f618c38bc9e844efe676df25087a836f8f9de59f Mon Sep 17 00:00:00 2001 From: ErikDanielsson Date: Mon, 4 Aug 2025 18:15:16 +0200 Subject: [PATCH 2/5] Start adding real support --- modules/nf-lang/src/main/antlr/ScriptLexer.g4 | 8 +- .../nextflow/script/ast/WorkflowNode.java | 11 +- .../nextflow/script/control/Compiler.java | 2 + .../nextflow/script/formatter/Formatter.java | 42 +- .../formatter/ScriptFormattingVisitor.java | 35 +- .../java/nextflow/script/parser/Comment.java | 34 -- .../nextflow/script/parser/CommentWriter.java | 373 ++++++++++++++++++ .../script/parser/ScriptAstBuilder.java | 195 +++++---- 8 files changed, 568 insertions(+), 132 deletions(-) delete mode 100644 modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java create mode 100644 modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java diff --git a/modules/nf-lang/src/main/antlr/ScriptLexer.g4 b/modules/nf-lang/src/main/antlr/ScriptLexer.g4 index 2db54fd6da..aa9057ca92 100644 --- a/modules/nf-lang/src/main/antlr/ScriptLexer.g4 +++ b/modules/nf-lang/src/main/antlr/ScriptLexer.g4 @@ -55,7 +55,7 @@ options { } channels { - COMMENTS // The COMMENTS channel will contain all comments, and are separatly introduced into the AST during creation + COMMENT // The COMMENT channel will contain all comments, and are separatly introduced into the AST during creation } @header { @@ -834,18 +834,18 @@ NL : LineTerminator /* { this.ignoreTokenInsideParens(); } */ // Multiple-line comments (including groovydoc comments) ML_COMMENT - : '/*' .*? '*/' /* { this.ignoreMultiLineCommentConditionally(); } */ -> channel(COMMENTS) + : '/*' .*? '*/' /* { this.ignoreMultiLineCommentConditionally(); } */ -> channel(COMMENT) ; // Single-line comments SL_COMMENT - : '//' ~[\r\n\uFFFF]* /* { this.ignoreTokenInsideParens(); } */ -> channel(COMMENTS) + : '//' ~[\r\n\uFFFF]* /* { this.ignoreTokenInsideParens(); } */ -> channel(COMMENT) ; // Script-header comments. // The very first characters of the file may be "#!". If so, ignore the first line. SH_COMMENT - : '#!' { require(errorIgnored || 0 == this.tokenIndex, "Shebang comment should appear at the first line", -2, true); } ShCommand (LineTerminator '#!' ShCommand)* -> channel(COMMENTS) + : '#!' { require(errorIgnored || 0 == this.tokenIndex, "Shebang comment should appear at the first line", -2, true); } ShCommand (LineTerminator '#!' ShCommand)* -> channel(COMMENT) ; // Unexpected characters will be handled by groovy parser later. diff --git a/modules/nf-lang/src/main/java/nextflow/script/ast/WorkflowNode.java b/modules/nf-lang/src/main/java/nextflow/script/ast/WorkflowNode.java index 9e3453488d..a53de1b284 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/ast/WorkflowNode.java +++ b/modules/nf-lang/src/main/java/nextflow/script/ast/WorkflowNode.java @@ -45,7 +45,14 @@ public class WorkflowNode extends MethodNode { public final Statement publishers; public WorkflowNode(String name, Statement takes, Statement main, Statement emits, Statement publishers) { - super(name, 0, dummyReturnType(emits), dummyParams(takes), ClassNode.EMPTY_ARRAY, EmptyStatement.INSTANCE); + super( + name != null ? name : "", // getText causes an exception if name is null, ugly hack for now + 0, + dummyReturnType(emits), + dummyParams(takes), + ClassNode.EMPTY_ARRAY, + EmptyStatement.INSTANCE + ); this.takes = takes; this.main = main; this.emits = emits; @@ -53,7 +60,7 @@ public WorkflowNode(String name, Statement takes, Statement main, Statement emit } public boolean isEntry() { - return getName() == null; + return getName() == ""; } public boolean isCodeSnippet() { diff --git a/modules/nf-lang/src/main/java/nextflow/script/control/Compiler.java b/modules/nf-lang/src/main/java/nextflow/script/control/Compiler.java index fce9af3c2d..7268ef80e9 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/control/Compiler.java +++ b/modules/nf-lang/src/main/java/nextflow/script/control/Compiler.java @@ -99,8 +99,10 @@ public void compile(SourceUnit source) { source.buildAST(); } catch( RecognitionException e ) { + System.err.println("RecognitionException: " + e.getMessage()); } catch( CompilationFailedException e ) { + System.err.println("CompilationFailedException: " + e.getMessage()); } } diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java index c2158ebf8e..37d8e7711c 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java @@ -19,6 +19,7 @@ import java.util.stream.Stream; import nextflow.script.ast.ASTNodeMarker; +import nextflow.script.parser.CommentWriter; import org.codehaus.groovy.ast.ASTNode; import org.codehaus.groovy.ast.ClassHelper; import org.codehaus.groovy.ast.ClassNode; @@ -72,12 +73,20 @@ public class Formatter extends CodeVisitorSupport { private FormattingOptions options; + private CommentWriter commentWriter; + private StringBuilder builder = new StringBuilder(); private int indentCount = 0; public Formatter(FormattingOptions options) { this.options = options; + this.commentWriter = null; + } + + public Formatter(FormattingOptions options, CommentWriter commentWriter) { + this.options = options; + this.commentWriter = commentWriter; } public void append(char c) { @@ -99,25 +108,26 @@ public void appendNewLine() { builder.append('\n'); } - public void appendLeadingComments(ASTNode node) { - var comments = (List) node.getNodeMetaData(ASTNodeMarker.LEADING_COMMENTS); - append("/*l*/"); - if( comments == null || comments.isEmpty() ) { - return; + public void writeComment(CommentWriter.Comment comment, boolean shouldBreakLine) { + var tuple = comment.write(); + appendIndent(); + append(tuple.getV1()); + if( tuple.getV2() || shouldBreakLine) { + appendNewLine(); + } else { + append(' '); } + } + + public void writeComments(List comments, boolean shouldBreakLine) { for( var comment : comments ) { - appendIndent(); - append(comment); + writeComment(comment, shouldBreakLine); } - // for( var line : DefaultGroovyMethods.asReversed(comments) ) { - // if( "\n".equals(line) ) { - // append(line); - // } - // else { - // appendIndent(); - // append(line.stripLeading()); - // } - // } + } + + public void appendLeadingComments(ASTNode node) { + var comments = commentWriter.getLeadingComments(node); + writeComments(comments, true); } public boolean hasTrailingComment(ASTNode node) { diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java index 5ed37052d0..9225c927d2 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java @@ -30,6 +30,8 @@ import nextflow.script.ast.ScriptNode; import nextflow.script.ast.ScriptVisitorSupport; import nextflow.script.ast.WorkflowNode; +import nextflow.script.parser.CommentWriter; + import org.codehaus.groovy.ast.ClassNode; import org.codehaus.groovy.ast.expr.EmptyExpression; import org.codehaus.groovy.ast.expr.PropertyExpression; @@ -40,6 +42,8 @@ import org.codehaus.groovy.ast.stmt.Statement; import org.codehaus.groovy.control.SourceUnit; +import groovy.lang.Tuple2; + import static nextflow.script.ast.ASTUtils.*; /** @@ -53,6 +57,8 @@ public class ScriptFormattingVisitor extends ScriptVisitorSupport { private FormattingOptions options; + private CommentWriter commentWriter; + private Formatter fmt; private int maxIncludeWidth = 0; @@ -62,7 +68,17 @@ public class ScriptFormattingVisitor extends ScriptVisitorSupport { public ScriptFormattingVisitor(SourceUnit sourceUnit, FormattingOptions options) { this.sourceUnit = sourceUnit; this.options = options; - this.fmt = new Formatter(options); + this.commentWriter = getCommentWriter(); + this.fmt = new Formatter(options, this.commentWriter); + } + + public CommentWriter getCommentWriter() { + var moduleNode = sourceUnit.getAST(); + if( moduleNode instanceof ScriptNode ) { + var scriptNode = (ScriptNode) moduleNode; + return (CommentWriter)scriptNode.getNodeMetaData("commentWriter"); + } + return null; } @Override @@ -70,6 +86,8 @@ protected SourceUnit getSourceUnit() { return sourceUnit; } + + public void visit() { var moduleNode = sourceUnit.getAST(); if( !(moduleNode instanceof ScriptNode) ) @@ -124,6 +142,7 @@ else if( decl instanceof ProcessNode pn ) else if( decl instanceof WorkflowNode wn ) visitWorkflow(wn); } + fmt.append(commentWriter.toNFComments()); } public String toString() { @@ -209,13 +228,23 @@ protected int getParamWidth(ParamNode node) { @Override public void visitWorkflow(WorkflowNode node) { + var leadingComments = commentWriter.getLeadingComments(node); + var withinComments = commentWriter.getWithinComments(node); + fmt.appendLeadingComments(node); fmt.append("workflow"); + fmt.append(' '); + if( withinComments.containsKey("keyword") ) { + fmt.writeComments(withinComments.get("keyword"), false); + } if( !node.isEntry() ) { - fmt.append(' '); fmt.append(node.getName()); + fmt.append(' '); + if( withinComments.containsKey("name") ) { + fmt.writeComments(withinComments.get("name"), false); + } } - fmt.append(" {\n"); + fmt.append("{\n"); fmt.incIndent(); if( node.takes instanceof BlockStatement ) { fmt.appendIndent(); diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java b/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java deleted file mode 100644 index dd09ae6587..0000000000 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/Comment.java +++ /dev/null @@ -1,34 +0,0 @@ -package nextflow.script.parser; - -import org.antlr.v4.runtime.Token; -import org.codehaus.groovy.ast.ASTNode; - -public class Comment { - private final Token token; - private final ASTNode attachedTo; - private final boolean isLeading; - private final boolean isTrailing; - - public Comment(Token token, ASTNode attachedTo, boolean isLeading, boolean isTrailing) { - this.token = token; - this.attachedTo = attachedTo; - this.isLeading = isLeading; - this.isTrailing = isTrailing; - } - - public Token getToken() { - return token; - } - - public ASTNode getAttachedTo() { - return attachedTo; - } - - public boolean isLeading() { - return isLeading; - } - - public boolean isTrailing() { - return isTrailing; - } -} diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java new file mode 100644 index 0000000000..758eb8a54e --- /dev/null +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java @@ -0,0 +1,373 @@ +package nextflow.script.parser; + +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.tree.TerminalNode; +import org.antlr.v4.runtime.Token; +import org.codehaus.groovy.ast.ASTNode; +import groovy.lang.Tuple2; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Collections; +import java.util.Iterator; +import java.util.ListIterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class CommentWriter { + + public final List commentTokens; + private final List allTokens; + + private final List unattachedComments; + private final List tokensWithInfo; + private final Set pendingComments = new HashSet<>(); + private final Map> attachedComments = new HashMap<>(); + + private final Map> leadingComments = new HashMap<>(); + private final Map>> withinComments = new HashMap<>(); + private final Map> trailingComments = new HashMap<>(); + + public CommentWriter(List allTokens) { + this.commentTokens = allTokens.stream().filter(t -> t.getChannel() == ScriptLexer.COMMENT).collect(Collectors.toList()); + this.allTokens = allTokens; + + this.tokensWithInfo = allTokens.stream().map(t -> new TokenInfo(t)).collect(Collectors.toList()); + this.unattachedComments = tokensWithInfo.stream() + .filter(t -> t.isComment()) + .map(t -> t.getComment()) + .collect(Collectors.toList()); + } + + public class TokenInfo { + private final Token token; + private final Comment comment; + + public TokenInfo(Token token) { + this.token = token; + if (token.getChannel() == ScriptLexer.COMMENT) { + this.comment = new Comment(token); + } else { + this.comment = null; + } + } + + public Token getToken() { + return token; + } + + public Comment getComment() { + return comment; + } + + public boolean isComment() { + return comment != null; + } + } + + + public String toNFComments() { + var header = "// CommentWriter with " + commentTokens.size() + " comment tokens"; + var unattachedHeader = "// Unattached comments:"; + var indent = " "; + var unattachedText = unattachedComments.stream() + .map(t -> { + var line = t.getToken().getLine(); + var column = t.getToken().getCharPositionInLine(); + var text = t.getToken().getText().replaceAll("\n", "\\n"); + var isLeading = t.isLeading(); + var isTrailing = t.isTrailing(); + var attachedTo = t.getAttachedTo(); + var attachedToText = attachedTo != null ? attachedTo.getText() : "null"; + return ( + "// " + indent + line + ":" + column + "{\n" + + "// " + indent + indent + "content: " + text + "\n" + + "// " + indent + indent + "attach: " + attachedToText + "\n" + + "// " + indent + "}\n" + ); + }) + .collect(Collectors.joining("\n")); + var pendingHeader = "// Pending comments:"; + var pendingText = pendingComments.stream() + .map(t -> { + var line = t.getToken().getLine(); + var column = t.getToken().getCharPositionInLine(); + var text = t.getToken().getText().replaceAll("\n", "\\n"); + return ( + "// " + indent + line + ":" + column + "{\n" + + "// " + indent + indent + "content: " + text + "\n" + + "// " + indent + "}\n" + ); + }) + .collect(Collectors.joining("\n")); + var attachedHeader = "// Attached comments:"; + var attachedText = attachedComments.values() + .stream() + .flatMap(List::stream) + .map(t -> { + var line = t.getToken().getLine(); + var column = t.getToken().getCharPositionInLine(); + var text = t.getToken().getText().replaceAll("\n", "\\n"); + var isLeading = t.isLeading(); + var isTrailing = t.isTrailing(); + ASTNode attachedTo = t.getAttachedTo(); + var simpleName = attachedTo != null ? attachedTo.getClass().getSimpleName() : null; + var maybeText = attachedTo != null ? attachedTo.getText(): null; + var attachToCode = maybeText != null ? maybeText.replaceAll("\n", "\\n") : "null"; + var attachedToText = attachedTo != null ? simpleName + " " + attachToCode : "null"; + return ( + "// " + indent + line + ":" + column + "{\n" + + "// " + indent + indent + "content : " + text + "\n" + + "// " + indent + indent + "attached to: " + attachedToText + "\n" + + "// " + indent + indent + "position : " + t.getPositionInfo() + "\n" + + "// " + indent + indent + "written : " + t.isWritten() + "\n" + + "// " + indent + "}\n" + ); + }) + .collect(Collectors.joining("\n")); + return header + "\n" + unattachedHeader + "\n" + unattachedText + "\n" + pendingHeader + "\n" + pendingText + "\n" + attachedHeader + "\n" + attachedText; + } + + public Map> getAttachedComments(ASTNode node) { + var within = new ArrayList(); + var leading = new ArrayList(); + var trailing = new ArrayList(); + attachedComments + .getOrDefault(node, new ArrayList<>()) + .forEach(c -> { + if( c.isWithin() ) { + within.add(c); + } else if( c.isLeading() ) { + leading.add(c); + } else if( c.isTrailing() ) { + trailing.add(c); + } + }); + return Map.of( + "leading", leading, + "within", within, + "trailing", trailing + ); + } + + public List getLeadingComments(ParserRuleContext ctx) { + if (unattachedComments.isEmpty()) { + return new ArrayList<>(); + } + + List comments = new ArrayList<>(); + int contextStartIndex = ctx.getStart().getTokenIndex(); + int firstCommentIndex = unattachedComments.get(0).getToken().getTokenIndex(); + + // Get all comment tokens that are before the context and only separated by newlines + for (int i = contextStartIndex - 1; i >= firstCommentIndex; i--) { + TokenInfo token = tokensWithInfo.get(i); + if (token.isComment()) { + comments.add(token.getComment()); + } else if (token.getToken().getType() == ScriptLexer.NL) { + continue; + } else { + break; + } + } + + // Remove the comments we've processed from unattachedComments + unattachedComments.removeAll(comments); + + return comments; + } + + + + public List getCommentsInbetween( + Token start, Token end, String positionInfo, boolean isLeading, boolean isTrailing + ) { + List comments = new ArrayList<>(); + Iterator it = unattachedComments.iterator(); + int startIdx = start.getStopIndex(); + int endIdx = end.getStartIndex(); + while( it.hasNext() ) { + Comment comment = it.next(); + if (comment.getStartIndex() >= startIdx && comment.getStopIndex() <= endIdx) { + System.err.println(comment.getToken().getLine() + ":" + comment.getToken().getCharPositionInLine() + " " + comment.getToken().getText()); + comments.add(comment); + comment.markAsWithin(positionInfo, isLeading, isTrailing); + comment.makePending(); + it.remove(); + } + } + return comments; + } + + public List getWithinComments(TerminalNode start, TerminalNode end, String positionInfo, boolean isLeading, boolean isTrailing) { + return getCommentsInbetween(start.getSymbol(), end.getSymbol(), positionInfo, isLeading, isTrailing); + } + + public List getTrailingComments(Token lastCtxToken) { + // A trailing comment is either on the same line as the last token of the context, + // or on the next line line after the last token of the context, but in this case it must + // be separated by at least one blank line from the next context + + List comments = new ArrayList<>(); + int contextStopIndex = lastCtxToken.getTokenIndex(); + ListIterator it = tokensWithInfo.listIterator(contextStopIndex); + TokenInfo token = it.next(); + if (token.isComment()) + comments.add(comment); // If the token directly after the context then it is always trailing + + int nlsBefore = 0; + List candiateComments = new ArrayList<>(); + boolean isLeading = false; + while( it.hasNext() && nlsBefore < 2 ) { + TokenInfo token = it.next(); + if (token.isComment()) { + candidateComments.add(token.getComment()); + } else if (token.getToken().getType() == ScriptLexer.NL) { + nlsBefore++; + } else { + // We encountered a non newline token before there were a blank line + // this means that the comment should be leading to the next statement instead + isLeading = true; + } + } + if (!isLeading) { + comments.addAll(candiatesComments); + } + commments.foreach(c -> { + c.makePending(); + c.markAsTrailing(); + }) + unattachedComments.removeAll(comments); + return comments; + } + + public void attachComments(ASTNode node, List comments) { + comments.forEach(c -> c.attachTo(node)); + } + + public List getLeadingComments(ASTNode node) { + return leadingComments.getOrDefault(node, new ArrayList<>()); + } + + public Map> getWithinComments(ASTNode node) { + return withinComments.getOrDefault(node, new HashMap<>()); + } + + public List getTrailingComments(ASTNode node) { + return trailingComments.getOrDefault(node, new ArrayList<>()); + } + + public class Comment { + private final Token token; + private boolean isLeading = false; + private boolean isTrailing = false; + private boolean isWithin = false; + + private ASTNode attachedTo = null; + + // If the comment is within a node, we need to keep track of where it was originally + // We use the field below to keep track of this + private String positionInfo = null; + + private boolean isWritten = false; + + public Comment(Token token) { + this.token = token; + } + + public void attachTo(ASTNode node) { + if (attachedTo != null) { + throw new IllegalStateException("Comment already attached to " + attachedTo); + } + this.attachedTo = node; + if (pendingComments.contains(this)) { + pendingComments.remove(this); + } else { + System.err.println("Comment not pending: " + token.getText()); + } + attachedComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); + if( isWithin ) { + withinComments.computeIfAbsent(node, k -> new HashMap<>()).computeIfAbsent(positionInfo, k -> new ArrayList<>()).add(this); + } else if( isLeading ) { + leadingComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); + } else if( isTrailing ) { + trailingComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); + } + } + + public void makePending() { + pendingComments.add(this); + } + + public Token getToken() { + return token; + } + + public ASTNode getAttachedTo() { + return attachedTo; + } + + public void markAsLeading() { + this.isLeading = true; + } + + public void markAsTrailing() { + this.isTrailing = true; + } + + public void markAsWithin(String positionInfo, boolean isLeading, boolean isTrailing) { + this.isWithin = true; + this.positionInfo = positionInfo; + this.isLeading = isLeading; + this.isTrailing = isTrailing; + } + + public boolean isLeading() { + return isLeading; + } + + public boolean isTrailing() { + return isTrailing; + } + + public boolean isWithin() { + return isWithin; + } + + public String getPositionInfo() { + return positionInfo; + } + + public int getStartIndex() { + return token.getStartIndex(); + } + + public int getStopIndex() { + return token.getStopIndex(); + } + + public boolean isWritten() { + return isWritten; + } + + public Tuple2 write() { + if( isWritten ) { + throw new IllegalStateException("Comment already written"); + } + isWritten = true; + return new Tuple2<>(token.getText(), token.getType() == ScriptLexer.SL_COMMENT); + } + + + + @Override + public int hashCode() { + return token.getTokenIndex(); + } + } + + +} diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java index 06aea61298..309e305c9e 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java @@ -105,7 +105,7 @@ import static nextflow.script.parser.PositionConfigureUtils.ast; import static nextflow.script.parser.PositionConfigureUtils.tokenPosition; import static nextflow.script.parser.ScriptParser.*; -import static nextflow.script.parser.Comment; +import static nextflow.script.parser.CommentWriter.*; import static org.codehaus.groovy.ast.expr.VariableExpression.THIS_EXPRESSION; import static org.codehaus.groovy.ast.tools.GeneralUtils.*; @@ -120,8 +120,8 @@ public class ScriptAstBuilder { private ScriptNode moduleNode; private ScriptLexer lexer; private ScriptParser parser; - private CommonTokenStream commentTokens; - private List unattachedComments; + private CommentWriter commentWriter; + private final GroovydocManager groovydocManager; private Tuple2 numberFormatError; @@ -135,10 +135,31 @@ public ScriptAstBuilder(SourceUnit sourceUnit) { this.parser = new ScriptParser(new CommonTokenStream(lexer)); parser.setErrorHandler(new DescriptiveErrorStrategy(charStream)); - // Collect all the comments from the lexer - this.commentTokens = new CommonTokenStream(lexer, ScriptLexer.COMMENTS); - this.unattachedComments = commentTokens.getTokens(); - + // // Collect all the comments from a separate lexer instance + var commentCharStream = createCharStream(sourceUnit); + var commentLexer = new ScriptLexer(commentCharStream); + var commentTokenStream = new CommonTokenStream(commentLexer); + + // Consume all tokens first + commentTokenStream.fill(); + + // Filter for only COMMENT channel tokens + var allTokens = commentTokenStream.getTokens(); + var commentTokensList = allTokens.stream() + .filter(token -> token.getChannel() == ScriptLexer.COMMENT) + .toList(); + var semanticTokensList = allTokens.stream() + .filter(token -> token.getChannel() == ScriptLexer.DEFAULT_TOKEN_CHANNEL) + .toList(); + + System.err.println("DEBUG: Found " + commentTokensList.size() + " comment tokens out of " + allTokens.size() + " total tokens"); + for (int i = 0; i < commentTokensList.size(); i++) { + var token = commentTokensList.get(i); + System.err.println("DEBUG: Comment token " + i + ": '" + token.getText() + "' (type: " + token.getType() + ", channel: " + token.getChannel() + ")"); + } + + this.commentWriter = new CommentWriter(allTokens); + moduleNode.putNodeMetaData("commentWriter", commentWriter); var groovydocEnabled = sourceUnit.getConfiguration().isGroovydocEnabled(); this.groovydocManager = new GroovydocManager(groovydocEnabled); @@ -257,24 +278,19 @@ private ModuleNode compilationUnit(CompilationUnitContext ctx) { } private boolean scriptDeclaration(ScriptDeclarationContext ctx) { + if( ctx instanceof FeatureFlagDeclAltContext ffac ) { - var leadingComments = getLeadingComments(ctx); var node = featureFlagDeclaration(ffac.featureFlagDeclaration()); - saveLeadingComments(node, leadingComments); moduleNode.addFeatureFlag(node); } else if( ctx instanceof EnumDefAltContext edac ) { - var leadingComments = getLeadingComments(ctx); var node = enumDef(edac.enumDef()); - saveLeadingComments(node, leadingComments); moduleNode.addClass(node); } else if( ctx instanceof FunctionDefAltContext fdac ) { - var leadingComments = getLeadingComments(ctx); var node = functionDef(fdac.functionDef()); - saveLeadingComments(node, leadingComments); moduleNode.addFunction(node); } @@ -285,39 +301,29 @@ else if( ctx instanceof ImportDeclAltContext iac ) { } else if( ctx instanceof IncludeDeclAltContext iac ) { - var leadingComments = getLeadingComments(ctx); var node = includeDeclaration(iac.includeDeclaration()); - saveLeadingComments(node, leadingComments); moduleNode.addInclude(node); } else if( ctx instanceof OutputDefAltContext odac ) { - var leadingComments = getLeadingComments(ctx); var node = outputDef(odac.outputDef()); - saveLeadingComments(node, leadingComments); if( moduleNode.getOutputs() != null ) collectSyntaxError(new SyntaxException("Output block defined more than once", node)); moduleNode.setOutputs(node); } else if( ctx instanceof ParamDeclAltContext pac ) { - var leadingComments = getLeadingComments(ctx); var node = paramDeclaration(pac.paramDeclaration()); - saveLeadingComments(node, leadingComments); moduleNode.addParam(node); } else if( ctx instanceof ProcessDefAltContext pdac ) { - var leadingComments = getLeadingComments(ctx); var node = processDef(pdac.processDef()); - saveLeadingComments(node, leadingComments); moduleNode.addProcess(node); } else if( ctx instanceof WorkflowDefAltContext wdac ) { - var leadingComments = getLeadingComments(ctx); var node = workflowDef(wdac.workflowDef()); - saveLeadingComments(node, leadingComments); if( node.isEntry() ) { if( moduleNode.getEntry() != null ) collectSyntaxError(new SyntaxException("Entry workflow defined more than once", node)); @@ -387,11 +393,13 @@ private ClassNode enumDef(EnumDefContext ctx) { } private ProcessNode processDef(ProcessDefContext ctx) { + var leadingComments = commentWriter.getLeadingComments(ctx); var name = ctx.name.getText(); if( ctx.body == null ) { var empty = EmptyStatement.INSTANCE; var result = ast( new ProcessNode(name, empty, empty, empty, EmptyExpression.INSTANCE, null, empty, empty), ctx ); collectSyntaxError(new SyntaxException("Missing process script body", result)); + commentWriter.attachComments(result, leadingComments); return result; } @@ -411,6 +419,7 @@ private ProcessNode processDef(ProcessDefContext ctx) { } var result = ast( new ProcessNode(name, directives, inputs, outputs, when, type, exec, stub), ctx ); + commentWriter.attachComments(result, leadingComments); groovydocManager.handle(result, ctx); return result; } @@ -428,21 +437,33 @@ private Statement processDirectives(ProcessDirectivesContext ctx) { private Statement processInputs(ProcessInputsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; + var comments = commentWriter.getLeadingComments(ctx); + comments.addAll(commentWriter.getWithinComments( + ctx.INPUT(), ctx.COLON(), "input", false, true + )); var statements = ctx.statement().stream() .map(this::statement) .map(stmt -> checkDirective(stmt, "Invalid process input")) .toList(); - return ast( block(null, statements), ctx ); + var result = ast( block(null, statements), ctx ); + commentWriter.attachComments(result, comments); + return result; } private Statement processOutputs(ProcessOutputsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; + var leadingComments = commentWriter.getLeadingComments(ctx); + var comments = commentWriter.getWithinComments( + ctx.OUTPUT(), ctx.COLON(), "output", false, true + ); var statements = ctx.statement().stream() .map(this::statement) .map(stmt -> checkDirective(stmt, "Invalid process output")) .toList(); - return ast( block(null, statements), ctx ); + var result = ast( block(null, statements), ctx ); + commentWriter.attachComments(result, comments); + return result; } private Statement checkDirective(Statement stmt, String errorMessage) { @@ -497,7 +518,13 @@ private boolean isDirectiveWithNegativeValue(Expression expression) { private Expression processWhen(ProcessWhenContext ctx) { if( ctx == null ) return EmptyExpression.INSTANCE; - return ast( expression(ctx.expression()), ctx ); + var comments = commentWriter.getLeadingComments(ctx); + comments.addAll(commentWriter.getWithinComments( + ctx.WHEN(), ctx.COLON(), "when", false, true + )); + var result = ast( expression(ctx.expression()), ctx ); + commentWriter.attachComments(result, comments); + return result; } private String processType(ProcessExecContext ctx) { @@ -517,40 +544,56 @@ private String processType(ProcessExecContext ctx) { private Statement processStub(ProcessStubContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; - return ast( blockStatements(ctx.blockStatements()), ctx ); + var comments = commentWriter.getLeadingComments(ctx); + comments.addAll(commentWriter.getWithinComments( + ctx.STUB(), ctx.COLON(), "stub", false, true + )); + var result = ast( blockStatements(ctx.blockStatements()), ctx ); + commentWriter.attachComments(result, comments); + return result; } private WorkflowNode workflowDef(WorkflowDefContext ctx) { var name = ctx.name != null ? ctx.name.getText() : null; + WorkflowNode result; + var comments = commentWriter.getLeadingComments(ctx); + if( name != null ) { + // Look for comments between the name and the body + comments.addAll(commentWriter.getCommentsInbetween(ctx.getStart(), ctx.name.getStop(), "keyword", false, true)); + comments.addAll(commentWriter.getCommentsInbetween(ctx.name.getStop(), ctx.LBRACE().getSymbol(), "name", false, true)); + } else { + comments.addAll(commentWriter.getCommentsInbetween(ctx.getStart(), ctx.LBRACE().getSymbol(), "keyword", false, true)); + } if( ctx.body == null ) { - var result = ast( new WorkflowNode(name, null, null, null, null), ctx ); + result = ast( new WorkflowNode(name, null, null, null, null), ctx ); groovydocManager.handle(result, ctx); return result; - } - - var takes = workflowTakes(ctx.body.workflowTakes()); - var emits = workflowEmits(ctx.body.workflowEmits()); - var publishers = workflowPublishers(ctx.body.workflowPublishers()); - var main = blockStatements( - ctx.body.workflowMain() != null - ? ctx.body.workflowMain().blockStatements() - : null - ); + } else { + var takes = workflowTakes(ctx.body.workflowTakes()); + var emits = workflowEmits(ctx.body.workflowEmits()); + var publishers = workflowPublishers(ctx.body.workflowPublishers()); + var main = blockStatements( + ctx.body.workflowMain() != null + ? ctx.body.workflowMain().blockStatements() + : null + ); + + if( name == null ) { + if( takes instanceof BlockStatement ) + collectSyntaxError(new SyntaxException("Entry workflow cannot have a take section", takes)); + if( emits instanceof BlockStatement ) + collectSyntaxError(new SyntaxException("Entry workflow cannot have an emit section", emits)); + } + if( name != null ) { + if( publishers instanceof BlockStatement ) + collectSyntaxError(new SyntaxException("Named workflow cannot have a publish section", publishers)); + } - if( name == null ) { - if( takes instanceof BlockStatement ) - collectSyntaxError(new SyntaxException("Entry workflow cannot have a take section", takes)); - if( emits instanceof BlockStatement ) - collectSyntaxError(new SyntaxException("Entry workflow cannot have an emit section", emits)); - } - if( name != null ) { - if( publishers instanceof BlockStatement ) - collectSyntaxError(new SyntaxException("Named workflow cannot have a publish section", publishers)); + result = ast( new WorkflowNode(name, takes, main, emits, publishers), ctx ); + groovydocManager.handle(result, ctx); } - - var result = ast( new WorkflowNode(name, takes, main, emits, publishers), ctx ); - groovydocManager.handle(result, ctx); + commentWriter.attachComments(result, comments); return result; } @@ -581,6 +624,7 @@ private Statement workflowEmits(WorkflowEmitsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; + var comments = commentWriter.getLeadingComments(ctx); var statements = ctx.statement().stream() .map(this::workflowEmit) .filter(stmt -> stmt != null) @@ -589,6 +633,7 @@ private Statement workflowEmits(WorkflowEmitsContext ctx) { var hasEmitExpression = statements.stream().anyMatch(this::isEmitExpression); if( hasEmitExpression && statements.size() > 1 ) collectSyntaxError(new SyntaxException("Every emit must be assigned to a name when there are multiple emits", result)); + commentWriter.attachComments(result, comments); return result; } @@ -685,7 +730,7 @@ private Statement statement(StatementContext ctx) { if( ctx instanceof EmptyStmtAltContext ) return EmptyStatement.INSTANCE; - List leadingComments = getLeadingComments(ctx); + List leadingComments = commentWriter.getLeadingComments(ctx); if( ctx instanceof IfElseStmtAltContext ieac ) result = ast( ifElseStatement(ieac.ifElseStatement()), ieac ); @@ -717,7 +762,7 @@ else if( ctx instanceof ExpressionStmtAltContext eac ) else throw createParsingFailedException("Invalid statement: " + ctx.getText(), ctx); - saveLeadingComments(result, leadingComments); + commentWriter.attachComments(result, leadingComments); return result; } @@ -831,9 +876,11 @@ private Expression variableNames(VariableNamesContext ctx) { } private Expression variableName(IdentifierContext ctx) { + var comments = commentWriter.getLeadingComments(ctx); var name = identifier(ctx); var result = ast( varX(name), ctx ); checkInvalidVarName(name, result); + commentWriter.attachComments(result, comments); return result; } @@ -1690,26 +1737,26 @@ private ClassNode legacyType(ParserRuleContext ctx) { /// COMMENTS - private List getLeadingComments(ParserRuleContext ctx) { - // Get all the unattached comments up until this point - var leadingComments = new ArrayList(unattachedComments.stream().map(Token::getText).toList()); - // int stopIndex = ctx.getStart().getTokenIndex(); - // Iterator it = unattachedComments.iterator(); - // while( it.hasNext() ) { - // var comment = it.next(); - // if( comment.getTokenIndex() >= stopIndex ) - // break; - // leadingComments.add(comment.getText()); - // it.remove(); // The comment has been attached to this node, so remove it from the unattached comments list - // } - - return leadingComments; - } - - private void saveLeadingComments(ASTNode node, List comments) { - if( !comments.isEmpty() ) - node.putNodeMetaData(ASTNodeMarker.LEADING_COMMENTS, comments); - } + // private List getLeadingComments(ParserRuleContext ctx) { + // // Get all the unattached comments up until this point + // var leadingComments = new ArrayList(unattachedComments.stream().map(Token::getText).toList()); + // // int stopIndex = ctx.getStart().getTokenIndex(); + // // Iterator it = unattachedComments.iterator(); + // // while( it.hasNext() ) { + // // var comment = it.next(); + // // if( comment.getTokenIndex() >= stopIndex ) + // // break; + // // leadingComments.add(comment.getText()); + // // it.remove(); // The comment has been attached to this node, so remove it from the unattached comments list + // // } + + // return leadingComments; + // } + + // private void saveLeadingComments(ASTNode node, List comments) { + // if( !comments.isEmpty() ) + // node.putNodeMetaData(ASTNodeMarker.LEADING_COMMENTS, comments.stream().map(CommentWriter.Comment::getToken).map(Token::getText).toList()); + // } private boolean saveLeadingComments0(ParserRuleContext ctx, List comments) { @@ -1855,7 +1902,7 @@ else if( t instanceof GroovySyntaxError gse ) else if( t instanceof Exception e ) collectException(e); - + return new CompilationFailedException( CompilePhase.PARSING.getPhaseNumber(), sourceUnit, @@ -1863,10 +1910,12 @@ else if( t instanceof Exception e ) } private void collectSyntaxError(SyntaxException e) { + e.printStackTrace(); sourceUnit.getErrorCollector().addFatalError(new SyntaxErrorMessage(e, sourceUnit)); } private void collectException(Exception e) { + e.printStackTrace(); sourceUnit.getErrorCollector().addException(e, sourceUnit); } From ade9682bf2046085daf30accb8eccf27d685c73e Mon Sep 17 00:00:00 2001 From: ErikDanielsson Date: Tue, 5 Aug 2025 16:54:06 +0200 Subject: [PATCH 3/5] Start adding support for directives --- .../nf-lang/src/main/antlr/ScriptParser.g4 | 2 +- .../nextflow/script/formatter/Formatter.java | 9 +- .../formatter/ScriptFormattingVisitor.java | 59 +++++-- .../nextflow/script/parser/CommentWriter.java | 162 ++++++++++++------ .../script/parser/ScriptAstBuilder.java | 116 +++++++++---- 5 files changed, 251 insertions(+), 97 deletions(-) diff --git a/modules/nf-lang/src/main/antlr/ScriptParser.g4 b/modules/nf-lang/src/main/antlr/ScriptParser.g4 index f35740d7b9..26a4501d1f 100644 --- a/modules/nf-lang/src/main/antlr/ScriptParser.g4 +++ b/modules/nf-lang/src/main/antlr/ScriptParser.g4 @@ -211,7 +211,7 @@ processWhen ; processExec - : (SCRIPT | SHELL | EXEC) COLON nls blockStatements + : execType=(SCRIPT | SHELL | EXEC) COLON nls blockStatements ; processStub diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java index ff03bd3cfb..af8309dfe8 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java @@ -16,6 +16,7 @@ package nextflow.script.formatter; import java.util.List; +import java.util.Map; import java.util.stream.Stream; import nextflow.script.ast.ASTNodeMarker; @@ -119,6 +120,12 @@ public void writeComment(CommentWriter.Comment comment, boolean shouldBreakLine) } } + public void writeComments(Map> comments, String key, boolean shouldBreakLine) { + if (comments.containsKey(key)) { + writeComments(comments.get(key), shouldBreakLine); + } + } + public void writeComments(List comments, boolean shouldBreakLine) { for( var comment : comments ) { writeComment(comment, shouldBreakLine); @@ -167,7 +174,7 @@ protected void visitIfElse(IfStatement node, boolean preIndent) { appendLeadingComments(node); if( preIndent ) appendIndent(); - append("i f ("); + append("if ("); visit(node.getBooleanExpression()); append(") {\n"); incIndent(); diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java index 9225c927d2..ce51de082a 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java @@ -142,7 +142,7 @@ else if( decl instanceof ProcessNode pn ) else if( decl instanceof WorkflowNode wn ) visitWorkflow(wn); } - fmt.append(commentWriter.toNFComments()); + System.out.print(commentWriter.toNFComments()); } public String toString() { @@ -234,15 +234,11 @@ public void visitWorkflow(WorkflowNode node) { fmt.appendLeadingComments(node); fmt.append("workflow"); fmt.append(' '); - if( withinComments.containsKey("keyword") ) { - fmt.writeComments(withinComments.get("keyword"), false); - } + fmt.writeComments(withinComments, "KEYWORD", false); if( !node.isEntry() ) { fmt.append(node.getName()); fmt.append(' '); - if( withinComments.containsKey("name") ) { - fmt.writeComments(withinComments.get("name"), false); - } + fmt.writeComments(withinComments, "NAME", false); } fmt.append("{\n"); fmt.incIndent(); @@ -359,10 +355,17 @@ else if( emit instanceof AssignmentExpression assign ) { @Override public void visitProcess(ProcessNode node) { + var withinComments = commentWriter.getWithinComments(node); + var trailingComments = commentWriter.getTrailingComments(node); + System.err.println(trailingComments); fmt.appendLeadingComments(node); fmt.append("process "); + fmt.writeComments(withinComments, "KEYWORD", false); fmt.append(node.getName()); - fmt.append(" {\n"); + fmt.writeComments(withinComments, "NAME", false); + fmt.append(" {"); + fmt.writeComments(trailingComments, "LBRACE", false); + fmt.append("\n"); fmt.incIndent(); if( node.directives instanceof BlockStatement ) { visitDirectives(node.directives); @@ -370,7 +373,14 @@ public void visitProcess(ProcessNode node) { } if( node.inputs instanceof BlockStatement ) { fmt.appendIndent(); - fmt.append("input:\n"); + fmt.appendLeadingComments(node.inputs); + var inputWithinComments = commentWriter.getWithinComments(node.inputs); + var inputTrailingComments = commentWriter.getTrailingComments(node.inputs); + fmt.append("input"); + fmt.writeComments(inputWithinComments, "INPUT", false); + fmt.append(":"); + fmt.writeComments(inputTrailingComments, "COLON", false); + fmt.append("\n"); visitDirectives(node.inputs); fmt.appendNewLine(); } @@ -380,19 +390,40 @@ public void visitProcess(ProcessNode node) { } if( !(node.when instanceof EmptyExpression) ) { fmt.appendIndent(); - fmt.append("when:\n"); + var whenWithinComments = commentWriter.getWithinComments(node.when); + var whenTrailingComments = commentWriter.getTrailingComments(node.when); + fmt.appendLeadingComments(node.when); + fmt.append("when"); + fmt.writeComments(whenWithinComments, "WHEN", false); + fmt.append(":"); + fmt.writeComments(whenTrailingComments, "COLON", false); + fmt.append("\n"); + fmt.incIndent(); fmt.appendIndent(); fmt.visit(node.when); fmt.append("\n\n"); } + fmt.appendLeadingComments(node.exec); fmt.appendIndent(); + var execWithinComments = commentWriter.getWithinComments(node.exec); + var execTrailingComments = commentWriter.getTrailingComments(node.exec); fmt.append(node.type); - fmt.append(":\n"); + fmt.writeComments(execWithinComments, "EXEC", false); + fmt.append(":"); + fmt.writeComments(execTrailingComments, "COLON", false); + fmt.append("\n"); fmt.visit(node.exec); if( !(node.stub instanceof EmptyStatement) ) { fmt.appendNewLine(); fmt.appendIndent(); - fmt.append("stub:\n"); + fmt.appendLeadingComments(node.stub); + var commentWriter = getCommentWriter(); + var stubWithinComments = commentWriter.getWithinComments(node.stub); + var stubTrailingComments = commentWriter.getTrailingComments(node.stub); + fmt.append("stub"); + fmt.writeComments(stubWithinComments, "STUB", false); + fmt.append(":"); + fmt.writeComments(stubTrailingComments, "COLON", false); fmt.visit(node.stub); } if( options.maheshForm() && node.outputs instanceof BlockStatement ) { @@ -400,7 +431,9 @@ public void visitProcess(ProcessNode node) { visitProcessOutputs(node.outputs); } fmt.decIndent(); - fmt.append("}\n"); + fmt.append("}"); + fmt.writeComments(trailingComments, "RBRACE", false); + fmt.append("\n"); } private void visitProcessOutputs(Statement outputs) { diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java index 758eb8a54e..d1f9fa809b 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java @@ -29,7 +29,7 @@ public class CommentWriter { private final Map> leadingComments = new HashMap<>(); private final Map>> withinComments = new HashMap<>(); - private final Map> trailingComments = new HashMap<>(); + private final Map>> trailingComments = new HashMap<>(); public CommentWriter(List allTokens) { this.commentTokens = allTokens.stream().filter(t -> t.getChannel() == ScriptLexer.COMMENT).collect(Collectors.toList()); @@ -66,6 +66,11 @@ public Comment getComment() { public boolean isComment() { return comment != null; } + + @Override + public String toString() { + return "TokenInfo:" + (isComment() ? "COMMENT" : "SEMANTIC") + ": '" + token.getText().replace("\n", "\\n") + "'"; + } } @@ -118,12 +123,18 @@ public String toNFComments() { var maybeText = attachedTo != null ? attachedTo.getText(): null; var attachToCode = maybeText != null ? maybeText.replaceAll("\n", "\\n") : "null"; var attachedToText = attachedTo != null ? simpleName + " " + attachToCode : "null"; + var relPos = ( + (t.isWithin() ? "within " : "")+ + (t.isLeading() ? "leading " : "") + + (t.isTrailing() ? "trailing " : "") + ); return ( "// " + indent + line + ":" + column + "{\n" + "// " + indent + indent + "content : " + text + "\n" + "// " + indent + indent + "attached to: " + attachedToText + "\n" + - "// " + indent + indent + "position : " + t.getPositionInfo() + "\n" + + "// " + indent + indent + "token : " + t.getPositionInfo() + "\n" + "// " + indent + indent + "written : " + t.isWritten() + "\n" + + "// " + indent + indent + "rel. pos. : " + relPos + "\n" + "// " + indent + "}\n" ); }) @@ -153,7 +164,7 @@ public Map> getAttachedComments(ASTNode node) { ); } - public List getLeadingComments(ParserRuleContext ctx) { + public List processLeadingComments(ParserRuleContext ctx) { if (unattachedComments.isEmpty()) { return new ArrayList<>(); } @@ -165,24 +176,24 @@ public List getLeadingComments(ParserRuleContext ctx) { // Get all comment tokens that are before the context and only separated by newlines for (int i = contextStartIndex - 1; i >= firstCommentIndex; i--) { TokenInfo token = tokensWithInfo.get(i); - if (token.isComment()) { + if( token.isComment() && !token.getComment().isConsumed() ) { comments.add(token.getComment()); - } else if (token.getToken().getType() == ScriptLexer.NL) { + } else if( token.getToken().getType() == ScriptLexer.NL ){ continue; } else { break; } } - - // Remove the comments we've processed from unattachedComments - unattachedComments.removeAll(comments); + + comments.forEach(c -> { + c.makePending(); + c.markAsLeading(ctx.getStart()); + }); return comments; } - - - public List getCommentsInbetween( + public List processInbetweenComments( Token start, Token end, String positionInfo, boolean isLeading, boolean isTrailing ) { List comments = new ArrayList<>(); @@ -194,56 +205,88 @@ public List getCommentsInbetween( if (comment.getStartIndex() >= startIdx && comment.getStopIndex() <= endIdx) { System.err.println(comment.getToken().getLine() + ":" + comment.getToken().getCharPositionInLine() + " " + comment.getToken().getText()); comments.add(comment); - comment.markAsWithin(positionInfo, isLeading, isTrailing); - comment.makePending(); - it.remove(); } } + comments.forEach(c -> { + c.markAsWithin(positionInfo, start, end, isLeading, isTrailing); + c.makePending(); + + }); return comments; } - public List getWithinComments(TerminalNode start, TerminalNode end, String positionInfo, boolean isLeading, boolean isTrailing) { - return getCommentsInbetween(start.getSymbol(), end.getSymbol(), positionInfo, isLeading, isTrailing); + public List processInbetweenComments(TerminalNode start, TerminalNode end, String positionInfo, boolean isLeading, boolean isTrailing) { + return processInbetweenComments(start.getSymbol(), end.getSymbol(), positionInfo, isLeading, isTrailing); } - public List getTrailingComments(Token lastCtxToken) { + + public List processTrailingComments(Token lastCtxToken, String positionInfo, boolean allowNewlines) { // A trailing comment is either on the same line as the last token of the context, // or on the next line line after the last token of the context, but in this case it must // be separated by at least one blank line from the next context List comments = new ArrayList<>(); int contextStopIndex = lastCtxToken.getTokenIndex(); - ListIterator it = tokensWithInfo.listIterator(contextStopIndex); - TokenInfo token = it.next(); - if (token.isComment()) - comments.add(comment); // If the token directly after the context then it is always trailing - + ListIterator it = tokensWithInfo.listIterator(contextStopIndex + 1); + TokenInfo token = null; int nlsBefore = 0; - List candiateComments = new ArrayList<>(); - boolean isLeading = false; - while( it.hasNext() && nlsBefore < 2 ) { - TokenInfo token = it.next(); - if (token.isComment()) { - candidateComments.add(token.getComment()); - } else if (token.getToken().getType() == ScriptLexer.NL) { - nlsBefore++; + // First get tokens that are directly after the context, without any newlines + while( it.hasNext() ) { + token = it.next(); + System.err.println(token); + if (token.isComment() && !token.getComment().isConsumed()) { + comments.add(token.getComment()); } else { - // We encountered a non newline token before there were a blank line - // this means that the comment should be leading to the next statement instead - isLeading = true; + if (token.getToken().getType() == ScriptLexer.NL) { + nlsBefore++; + } + break; } } - if (!isLeading) { - comments.addAll(candiatesComments); + + if (allowNewlines) { + // Get comments that are on the lines following the context + // but only if they seem to be trailing rather than leading + // for the next context + List candidateComments = new ArrayList<>(); + boolean isLeading = false; + while( it.hasNext() && nlsBefore < 2 ) { + token = it.next(); + if (token.isComment() && !token.getComment().isConsumed()) { + candidateComments.add(token.getComment()); + } else if (token.getToken().getType() == ScriptLexer.NL) { + nlsBefore++; + } else { + // We encountered a non newline token before there were a blank line + // this means that the comment should be leading to the next statement instead + isLeading = true; + } + } + if (!isLeading) { + comments.addAll(candidateComments); + } } - commments.foreach(c -> { + + comments.forEach(c -> { c.makePending(); - c.markAsTrailing(); - }) - unattachedComments.removeAll(comments); + c.markAsTrailing(lastCtxToken, positionInfo); + }); + return comments; } + public List processTrailingComments(TerminalNode term, String positionInfo, boolean allowNewlines) { + return processTrailingComments(term.getSymbol(), positionInfo, allowNewlines); + } + + public List processTrailingComments(TerminalNode term, boolean allowNewlines) { + return processTrailingComments(term.getSymbol(), "STANDARD", allowNewlines); + } + + public List processTrailingComments(Token token, boolean allowNewlines) { + return processTrailingComments(token, "STANDARD", allowNewlines); + } + public void attachComments(ASTNode node, List comments) { comments.forEach(c -> c.attachTo(node)); } @@ -256,8 +299,8 @@ public Map> getWithinComments(ASTNode node) { return withinComments.getOrDefault(node, new HashMap<>()); } - public List getTrailingComments(ASTNode node) { - return trailingComments.getOrDefault(node, new ArrayList<>()); + public Map> getTrailingComments(ASTNode node) { + return trailingComments.getOrDefault(node, new HashMap<>()); } public class Comment { @@ -267,10 +310,13 @@ public class Comment { private boolean isWithin = false; private ASTNode attachedTo = null; + private boolean consumed = false; // If the comment is within a node, we need to keep track of where it was originally // We use the field below to keep track of this private String positionInfo = null; + private Token precedingToken = null; + private Token followingToken = null; private boolean isWritten = false; @@ -283,23 +329,24 @@ public void attachTo(ASTNode node) { throw new IllegalStateException("Comment already attached to " + attachedTo); } this.attachedTo = node; - if (pendingComments.contains(this)) { - pendingComments.remove(this); - } else { - System.err.println("Comment not pending: " + token.getText()); - } + pendingComments.remove(this); attachedComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); if( isWithin ) { withinComments.computeIfAbsent(node, k -> new HashMap<>()).computeIfAbsent(positionInfo, k -> new ArrayList<>()).add(this); } else if( isLeading ) { leadingComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); } else if( isTrailing ) { - trailingComments.computeIfAbsent(node, k -> new ArrayList<>()).add(this); + trailingComments.computeIfAbsent(node, k -> new HashMap<>()).computeIfAbsent(positionInfo, k -> new ArrayList<>()).add(this); } } public void makePending() { + if (consumed) { + throw new IllegalStateException("Cannot make comment " + token.getText() + " pending since it already is"); + } + unattachedComments.remove(this); pendingComments.add(this); + consumed = true; } public Token getToken() { @@ -310,21 +357,36 @@ public ASTNode getAttachedTo() { return attachedTo; } - public void markAsLeading() { + public void markAsLeading(Token followingToken) { this.isLeading = true; + this.followingToken = followingToken; } - public void markAsTrailing() { + public void markAsTrailing(Token precedingToken, String positionInfo) { this.isTrailing = true; + this.precedingToken = precedingToken; + this.positionInfo = positionInfo; } - public void markAsWithin(String positionInfo, boolean isLeading, boolean isTrailing) { + public void markAsWithin( + String positionInfo, + Token start, + Token end, + boolean isLeading, + boolean isTrailing + ) { this.isWithin = true; this.positionInfo = positionInfo; + this.precedingToken = start; + this.followingToken = end; this.isLeading = isLeading; this.isTrailing = isTrailing; } + public boolean isConsumed() { + return consumed; + } + public boolean isLeading() { return isLeading; } diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java index 9c8216580c..dad6ae981f 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java @@ -393,13 +393,20 @@ private ClassNode enumDef(EnumDefContext ctx) { } private ProcessNode processDef(ProcessDefContext ctx) { - var leadingComments = commentWriter.getLeadingComments(ctx); + var comments = commentWriter.processLeadingComments(ctx); var name = ctx.name.getText(); + // Comments between keyword and name + comments.addAll(commentWriter.processInbetweenComments(ctx.getStart(), ctx.name.getStop(), "KEYWORD", false, true)); + // Comments between name and '{' + comments.addAll(commentWriter.processInbetweenComments(ctx.name.getStop(), ctx.LBRACE().getSymbol(), "NAME", false, true)); + // Look for trailing comments after the '{' + comments.addAll(commentWriter.processTrailingComments(ctx.LBRACE().getSymbol(), "LBRACE", false)); + if( ctx.body == null ) { var empty = EmptyStatement.INSTANCE; var result = ast( new ProcessNode(name, empty, empty, empty, EmptyExpression.INSTANCE, null, empty, empty), ctx ); collectSyntaxError(new SyntaxException("Missing process script body", result)); - commentWriter.attachComments(result, leadingComments); + commentWriter.attachComments(result, comments); return result; } @@ -407,7 +414,7 @@ private ProcessNode processDef(ProcessDefContext ctx) { var inputs = processInputs(ctx.body.processInputs()); var outputs = processOutputs(ctx.body.processOutputs()); var when = processWhen(ctx.body.processWhen()); - var type = processType(ctx.body.processExec()); + var type = processExec(ctx.body.processExec()); var exec = ctx.body.blockStatements() != null ? blockStatements(ctx.body.blockStatements()) : blockStatements(ctx.body.processExec().blockStatements()); @@ -418,8 +425,11 @@ private ProcessNode processDef(ProcessDefContext ctx) { collectSyntaxError(new SyntaxException("The `script:` or `exec:` label is required when other sections are present", exec)); } + // Look for trailing comments after the '}' + comments.addAll(commentWriter.processTrailingComments(ctx.RBRACE().getSymbol(), "RBRACE", true)); + var result = ast( new ProcessNode(name, directives, inputs, outputs, when, type, exec, stub), ctx ); - commentWriter.attachComments(result, leadingComments); + commentWriter.attachComments(result, comments); groovydocManager.handle(result, ctx); return result; } @@ -437,10 +447,16 @@ private Statement processDirectives(ProcessDirectivesContext ctx) { private Statement processInputs(ProcessInputsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; - var comments = commentWriter.getLeadingComments(ctx); - comments.addAll(commentWriter.getWithinComments( - ctx.INPUT(), ctx.COLON(), "input", false, true + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( + ctx.INPUT(), ctx.COLON(), "INPUT", false, true )); + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), "COLON", false + ) + ); var statements = ctx.statement().stream() .map(this::statement) .map(stmt -> checkDirective(stmt, "Invalid process input")) @@ -453,10 +469,19 @@ private Statement processInputs(ProcessInputsContext ctx) { private Statement processOutputs(ProcessOutputsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; - var leadingComments = commentWriter.getLeadingComments(ctx); - var comments = commentWriter.getWithinComments( - ctx.OUTPUT(), ctx.COLON(), "output", false, true - ); + + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( + ctx.OUTPUT(), ctx.COLON(), "OUTPUT", false, true + )); + + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), false + ) + ); + var statements = ctx.statement().stream() .map(this::statement) .map(stmt -> checkDirective(stmt, "Invalid process output")) @@ -518,36 +543,62 @@ private boolean isDirectiveWithNegativeValue(Expression expression) { private Expression processWhen(ProcessWhenContext ctx) { if( ctx == null ) return EmptyExpression.INSTANCE; - var comments = commentWriter.getLeadingComments(ctx); - comments.addAll(commentWriter.getWithinComments( + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( ctx.WHEN(), ctx.COLON(), "when", false, true )); + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), false + ) + ); + var result = ast( expression(ctx.expression()), ctx ); commentWriter.attachComments(result, comments); return result; } - private String processType(ProcessExecContext ctx) { + private String processExec(ProcessExecContext ctx) { + if( ctx == null ) + // If there is no body default to body return "script"; - - if( ctx.EXEC() != null ) { - return "exec"; - } - if( ctx.SHELL() != null ) { - collectWarning("The `shell` block is deprecated, use `script` instead", ctx.SHELL().getText(), ast( new EmptyExpression(), ctx.SHELL() )); - return "shell"; + else { + var execTypeToken = ctx.execType; + + // Collect comments + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( + execTypeToken, ctx.COLON().getSymbol(), "EXEC", false, true + )); + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), false + ) + ); + if (execTypeToken.getType() == ScriptLexer.SHELL) + collectWarning("The `shell` block is deprecated, use `script` instead", ctx.SHELL().getText(), ast( new EmptyExpression(), ctx.SHELL() )); + + return execTypeToken.getText(); } - return "script"; } private Statement processStub(ProcessStubContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; - var comments = commentWriter.getLeadingComments(ctx); - comments.addAll(commentWriter.getWithinComments( + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( ctx.STUB(), ctx.COLON(), "stub", false, true )); + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), false + ) + ); + var result = ast( blockStatements(ctx.blockStatements()), ctx ); commentWriter.attachComments(result, comments); return result; @@ -556,13 +607,13 @@ private Statement processStub(ProcessStubContext ctx) { private WorkflowNode workflowDef(WorkflowDefContext ctx) { var name = ctx.name != null ? ctx.name.getText() : null; WorkflowNode result; - var comments = commentWriter.getLeadingComments(ctx); + var comments = commentWriter.processLeadingComments(ctx); if( name != null ) { // Look for comments between the name and the body - comments.addAll(commentWriter.getCommentsInbetween(ctx.getStart(), ctx.name.getStop(), "keyword", false, true)); - comments.addAll(commentWriter.getCommentsInbetween(ctx.name.getStop(), ctx.LBRACE().getSymbol(), "name", false, true)); + comments.addAll(commentWriter.processInbetweenComments(ctx.getStart(), ctx.name.getStop(), "KEYWORD", false, true)); + comments.addAll(commentWriter.processInbetweenComments(ctx.name.getStop(), ctx.LBRACE().getSymbol(), "NAME", false, true)); } else { - comments.addAll(commentWriter.getCommentsInbetween(ctx.getStart(), ctx.LBRACE().getSymbol(), "keyword", false, true)); + comments.addAll(commentWriter.processInbetweenComments(ctx.getStart(), ctx.LBRACE().getSymbol(), "KEYWORD", false, true)); } if( ctx.body == null ) { @@ -624,7 +675,7 @@ private Statement workflowEmits(WorkflowEmitsContext ctx) { if( ctx == null ) return EmptyStatement.INSTANCE; - var comments = commentWriter.getLeadingComments(ctx); + var comments = commentWriter.processLeadingComments(ctx); var statements = ctx.statement().stream() .map(this::workflowEmit) .filter(stmt -> stmt != null) @@ -730,7 +781,7 @@ private Statement statement(StatementContext ctx) { if( ctx instanceof EmptyStmtAltContext ) return EmptyStatement.INSTANCE; - List leadingComments = commentWriter.getLeadingComments(ctx); + List comments = commentWriter.processLeadingComments(ctx); if( ctx instanceof IfElseStmtAltContext ieac ) result = ast( ifElseStatement(ieac.ifElseStatement()), ieac ); @@ -762,7 +813,8 @@ else if( ctx instanceof ExpressionStmtAltContext eac ) else throw createParsingFailedException("Invalid statement: " + ctx.getText(), ctx); - commentWriter.attachComments(result, leadingComments); + comments.addAll(commentWriter.processTrailingComments(ctx.getStop(), true)); + commentWriter.attachComments(result, comments); return result; } @@ -876,7 +928,7 @@ private Expression variableNames(VariableNamesContext ctx) { } private Expression variableName(IdentifierContext ctx) { - var comments = commentWriter.getLeadingComments(ctx); + var comments = commentWriter.processLeadingComments(ctx); var name = identifier(ctx); var result = ast( varX(name), ctx ); checkInvalidVarName(name, result); From c52636ec6190195d01b65955be8b20c42f803851 Mon Sep 17 00:00:00 2001 From: ErikDanielsson Date: Thu, 7 Aug 2025 14:05:10 +0200 Subject: [PATCH 4/5] Change ownership of newlines, abstract space printing --- .../nextflow/script/formatter/Formatter.java | 447 +++++++++++++----- .../formatter/ScriptFormattingVisitor.java | 252 +++++----- .../nextflow/script/parser/CommentWriter.java | 22 +- .../script/parser/ScriptAstBuilder.java | 89 ++-- 4 files changed, 521 insertions(+), 289 deletions(-) diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java index af8309dfe8..8a951b497b 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java @@ -15,6 +15,7 @@ */ package nextflow.script.formatter; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.stream.Stream; @@ -98,59 +99,225 @@ public void append(String str) { builder.append(str); } - public void appendIndent() { + private char peekBuilder() { + return builder.charAt(builder.length() - 1); + } + + public void appendSpace() { + if (Character.isWhitespace(peekBuilder())) { + // We never want to append double space, or a space directly after an indent of a newline + return; + } else { + append(' '); + } + } + + public void appendPadding(int padding) { + append(" ".repeat(padding)); + } + + public void appendIndent() {} + + public void _appendIndent() { var str = options.insertSpaces() ? " ".repeat(options.tabSize() * indentCount) : "\t".repeat(indentCount); builder.append(str); } - public void appendNewLine() { + boolean isFirst = true; + private void appendNL() { + if (isFirst) { + isFirst = false; + return; + } builder.append('\n'); } - public void writeComment(CommentWriter.Comment comment, boolean shouldBreakLine) { - var tuple = comment.write(); - appendIndent(); - append(tuple.getV1()); - if( tuple.getV2() || shouldBreakLine) { + private void appendNLs(int i) { + builder.append("\n".repeat(i)); + } + + public void appendNewLine() { + appendNL(); + _appendIndent(); + } + + public void appendBlanks(int i) { + appendNLs(i); + } + + /* + * Write a single comment while keeping track of breaking lines and WS before and after + */ + public boolean writeComment( + CommentWriter.Comment comment, + boolean newlineBefore, + boolean newlineAfter, + boolean makeWSBefore, + boolean makeWSAfter + ) { + var commentAndIsSLC = comment.write(); + String commentText = commentAndIsSLC.getV1(); + boolean isSLC = commentAndIsSLC.getV2(); + + if( newlineBefore ) { appendNewLine(); - } else { - append(' '); + } else if( makeWSBefore ) { + appendSpace(); } - } - public void writeComments(Map> comments, String key, boolean shouldBreakLine) { - if (comments.containsKey(key)) { - writeComments(comments.get(key), shouldBreakLine); + append(commentText); + + if( isSLC || newlineAfter ) { + return true; + } + + if( makeWSAfter ) { + appendSpace(); } + + return false; } - public void writeComments(List comments, boolean shouldBreakLine) { - for( var comment : comments ) { - writeComment(comment, shouldBreakLine); - } + public void writeComments( + Map> comments, + String key, + boolean newlineBefore, + boolean makeWSBefore, + boolean makeWSAfter, + boolean MLCOnOwnLines, + boolean inExpression + ) { + if (comments.containsKey(key)) { + writeComments( + comments.get(key), + newlineBefore, + makeWSBefore, + makeWSAfter, + MLCOnOwnLines, + inExpression + ); + } + } + + public void writeComments( + List comments, + boolean newlineBefore, + boolean makeWSBefore, + boolean makeWSAfter, + boolean MLCOnOwnLines, + boolean inExpression + ) { + if (comments.isEmpty()) return; + /* + * These are are the four cases: + * - WSBefore && WSAfter: + * The first comment writes WS before and after, + * the rest are responsible for writing the WS after + * + * - WSBefore = true, WSAfter = false: + * Each comment should only write WS before + * + * - WSBefore = false, WSAfter = true: + * Each comment should only write WS after + * + * + * - WSBefore = false, WSAfter = false: + * The first comment writes no whitespace at all (if it is not alone), + * the rest of the comment only write WS before + */ + Iterator it = comments.iterator(); + var breakNext = writeComment( + it.next(), newlineBefore, MLCOnOwnLines, makeWSBefore, makeWSAfter + ); + while( it.hasNext() ) + breakNext = writeComment( + it.next(), breakNext, MLCOnOwnLines, !makeWSAfter, makeWSAfter + ); + + if (breakNext && inExpression) { + // If we inside an expression where the next token will not break the line + // we are required to break the line here already + appendNewLine(); + } + } + + public void appendLeadingComments( + ASTNode node, + boolean newlineBefore, + boolean makeWSBefore, + boolean makeWSAfter, + boolean MLCOnOwnLines, + boolean inExpression + ) { + var comments = commentWriter.getLeadingComments(node); + writeComments( + comments, + newlineBefore, + makeWSBefore, + makeWSAfter, + MLCOnOwnLines, + inExpression + ); } public void appendLeadingComments(ASTNode node) { - var comments = commentWriter.getLeadingComments(node); - writeComments(comments, true); + appendLeadingComments(node, true, false, false, true, false); } - public boolean hasTrailingComment(ASTNode node) { - var comment = (String) node.getNodeMetaData(ASTNodeMarker.TRAILING_COMMENT); - return comment != null; + public void appendLeadingCommentsExpression(ASTNode node) { + appendLeadingComments(node, false, false, true, false, true); + } - public void appendTrailingComment(ASTNode node) { - var comment = (String) node.getNodeMetaData(ASTNodeMarker.TRAILING_COMMENT); - append("/* */"); - if( comment != null ) { - append(' '); - append(comment); + public void appendLeadingCommentsStatement(ASTNode node) { + appendLeadingComments(node, true, false, false, true, false); + } + + public void appendTrailingComments( + ASTNode node, + boolean newlineBefore, + boolean makeWSBefore, + boolean makeWSAfter, + boolean MLCOnOwnLines, + boolean inExpression + ) { + var comments = commentWriter.getTrailingComments(node); + if (comments.containsKey("STANDARD")) { + writeComments( + comments.get("STANDARD"), + newlineBefore, + makeWSBefore, + makeWSAfter, + MLCOnOwnLines, + inExpression + ); } } + public void appendTrailingInside( + Map> comments, + String key + ) { + writeComments(comments, key, false, true, true, false, true); + } + + public void appendWithinComments( + Map> comments, + String key + ) { + writeComments(comments, key, false, true, false, false, true); + } + + public void appendTrailingComments(ASTNode node) { + appendTrailingComments(node, false, true, false, false, false); + } + + public void appendTrailingCommentsExpression(ASTNode node) { + appendTrailingComments(node, false, true, true, false, true); + } + public void incIndent() { indentCount++; } @@ -172,29 +339,35 @@ public void visitIfElse(IfStatement node) { protected void visitIfElse(IfStatement node, boolean preIndent) { appendLeadingComments(node); - if( preIndent ) - appendIndent(); - append("if ("); + appendNewLine(); + append("if"); + appendSpace(); + append('('); visit(node.getBooleanExpression()); - append(") {\n"); + append(')'); + appendSpace(); + append("{"); incIndent(); visit(node.getIfBlock()); decIndent(); - appendIndent(); - append("}\n"); + appendNewLine(); + append("}"); if( node.getElseBlock() instanceof IfStatement is ) { - appendIndent(); - append("else "); + appendNewLine(); + append("else"); + appendSpace(); visitIfElse(is, false); } else if( !(node.getElseBlock() instanceof EmptyStatement) ) { - appendIndent(); - append("else {\n"); + appendNewLine(); + append("else"); + appendSpace(); + append('{'); incIndent(); visit(node.getElseBlock()); decIndent(); - appendIndent(); - append("}\n"); + appendNewLine(); + append("}"); } } @@ -205,10 +378,10 @@ public void visitExpressionStatement(ExpressionStatement node) { var cre = currentRootExpr; currentRootExpr = node.getExpression(); appendLeadingComments(node); - appendIndent(); + appendNewLine(); visitStatementLabels(node); visit(node.getExpression()); - appendNewLine(); + appendTrailingComments(node); currentRootExpr = cre; } @@ -217,7 +390,8 @@ private void visitStatementLabels(ExpressionStatement node) { return; for( var label : DefaultGroovyMethods.asReversed(node.getStatementLabels()) ) { append(label); - append(": "); + append(":"); + appendSpace(); } } @@ -226,36 +400,41 @@ public void visitReturnStatement(ReturnStatement node) { var cre = currentRootExpr; currentRootExpr = node.getExpression(); appendLeadingComments(node); - appendIndent(); - append("return "); - visit(node.getExpression()); appendNewLine(); + append("return"); + appendSpace(); + visit(node.getExpression()); currentRootExpr = cre; } @Override public void visitAssertStatement(AssertStatement node) { appendLeadingComments(node); - appendIndent(); - append("assert "); + appendNewLine(); + append("assert"); + appendSpace(); visit(node.getBooleanExpression()); if( !(node.getMessageExpression() instanceof ConstantExpression ce && ce.isNullExpression()) ) { - append(" : "); + appendSpace(); + append(":"); + appendSpace(); visit(node.getMessageExpression()); } - appendNewLine(); } @Override public void visitTryCatchFinally(TryCatchStatement node) { appendLeadingComments(node); - appendIndent(); - append("try {\n"); + appendNewLine(); + append("try"); + appendSpace(); + append('{'); incIndent(); + appendNewLine(); visit(node.getTryStatement()); decIndent(); - appendIndent(); - append("}\n"); + appendNewLine(); + append("}"); for( var catchStatement : node.getCatchStatements() ) { visit(catchStatement); } @@ -264,32 +443,35 @@ public void visitTryCatchFinally(TryCatchStatement node) { @Override public void visitThrowStatement(ThrowStatement node) { appendLeadingComments(node); - appendIndent(); - append("throw "); - visit(node.getExpression()); appendNewLine(); + append("throw"); + appendSpace(); + visit(node.getExpression()); } @Override public void visitCatchStatement(CatchStatement node) { appendLeadingComments(node); - appendIndent(); - append("catch ("); + appendNewLine(); + append("catch"); + appendSpace(); + append('('); var variable = node.getVariable(); var type = variable.getType(); if( !ClassHelper.isObjectType(type) ) { append(type.getNameWithoutPackage()); - append(' '); + appendSpace(); } append(variable.getName()); - - append(") {\n"); + append(')'); + appendSpace(); + append('{'); incIndent(); visit(node.getCode()); decIndent(); - appendIndent(); - append("}\n"); + appendNewLine(); + append("}"); } // expressions @@ -335,14 +517,13 @@ else if( node.isSafe() ) incIndent(); visitArguments(parenArgs, wrap); if( wrap ) { - appendNewLine(); decIndent(); - appendIndent(); + appendNewLine(); } append(')'); } if( lastClosureArg ) { - append(' '); + appendSpace(); visit(args.get(args.size() - 1)); } inWrappedMethodChain = iwmc; @@ -355,7 +536,8 @@ else if( node.isSafe() ) @Override public void visitConstructorCallExpression(ConstructorCallExpression node) { - append("new "); + append("new"); + appendSpace(); visitTypeAnnotation(node.getType()); append('('); visitArguments(asMethodCallArguments(node), false); @@ -363,14 +545,14 @@ public void visitConstructorCallExpression(ConstructorCallExpression node) { } public void visitDirective(MethodCallExpression call) { - appendIndent(); + appendLeadingComments(call); + appendNewLine(); append(call.getMethodAsString()); var arguments = asMethodCallArguments(call); if( !arguments.isEmpty() ) { - append(' '); + appendSpace(); visitArguments(arguments, false); } - appendNewLine(); } public void visitArguments(List args, boolean wrap) { @@ -381,7 +563,8 @@ public void visitArguments(List args, boolean wrap) { visitPositionalArgs(positionalArgs, wrap); if( hasNamedArgs ) { if( positionalArgs.size() > 0 ) - append(wrap ? "," : ", "); + append(','); + if (!wrap) appendSpace(); var mapX = (MapExpression)args.get(0); visitNamedArgs(mapX.getMapEntryExpressions(), wrap); } @@ -393,15 +576,18 @@ public void visitArguments(List args, boolean wrap) { @Override public void visitBinaryExpression(BinaryExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); if( node instanceof DeclarationExpression ) { - append("def "); + appendNewLine(); // A declaration expression is treated as a statement here, so break the line + append("def"); + appendSpace(); inVariableDeclaration = true; visit(node.getLeftExpression()); inVariableDeclaration = false; var source = node.getRightExpression(); if( !(source instanceof EmptyExpression) ) { - append(" = "); + append("="); + appendSpace(); var cre = currentRootExpr; currentRootExpr = source; visit(source); @@ -425,15 +611,14 @@ public void visitBinaryExpression(BinaryExpression node) { visit(node.getLeftExpression()); if( inWrappedPipeChain ) { - appendNewLine(); incIndent(); - appendIndent(); + appendNewLine(); } else { - append(' '); + appendSpace(); } append(node.getOperation().getText()); - append(' '); + appendSpace(); var iwpc = inWrappedPipeChain; inWrappedPipeChain = false; @@ -458,69 +643,77 @@ public void visitBinaryExpression(BinaryExpression node) { @Override public void visitTernaryExpression(TernaryExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); if( shouldWrapExpression(node) ) { visit(node.getBooleanExpression()); incIndent(); appendNewLine(); - appendIndent(); - append("? "); + append("?"); + appendSpace(); visit(node.getTrueExpression()); appendNewLine(); - appendIndent(); - append(": "); + append(":"); + appendSpace(); visit(node.getFalseExpression()); decIndent(); } else { visit(node.getBooleanExpression()); - append(" ? "); + appendSpace(); + append("?"); + appendSpace(); visit(node.getTrueExpression()); - append(" : "); + appendSpace(); + append(":"); + appendSpace(); visit(node.getFalseExpression()); } } @Override public void visitShortTernaryExpression(ElvisOperatorExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); visit(node.getTrueExpression()); - append(" ?: "); + appendSpace(); + append("?:"); + appendSpace(); visit(node.getFalseExpression()); } @Override public void visitNotExpression(NotExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); append('!'); visit(node.getExpression()); } @Override public void visitClosureExpression(ClosureExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); append('{'); if( node.getParameters() != null && node.getParameters().length > 0 ) { - append(' '); + appendSpace(); visitParameters(node.getParameters()); - append(" ->"); + appendSpace(); + append("->"); } var code = (BlockStatement) node.getCode(); if( code.getStatements().size() == 0 ) { - append(" }"); + appendSpace(); + append("}"); } else if( code.getStatements().size() == 1 && code.getStatements().get(0) instanceof ExpressionStatement es && !shouldWrapExpression(node) ) { - append(' '); + appendSpace(); visitStatementLabels(es); visit(es.getExpression()); - append(" }"); + appendSpace(); + append("}"); } else { - appendNewLine(); incIndent(); visit(code); decIndent(); - appendIndent(); + appendNewLine(); append('}'); } } @@ -530,37 +723,40 @@ public void visitParameters(Parameter[] parameters) { var param = parameters[i]; if( isLegacyType(param.getType()) ) { visitTypeAnnotation(param.getType()); - append(' '); + appendSpace(); } append(param.getName()); if( param.hasInitialExpression() ) { - append(" = "); + appendSpace(); + append("="); + appendSpace(); visit(param.getInitialExpression()); } - if( i + 1 < parameters.length ) - append(", "); + if( i + 1 < parameters.length ) { + append(","); + appendSpace(); + } } } @Override public void visitTupleExpression(TupleExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); var wrap = shouldWrapExpression(node); append('('); if( wrap ) incIndent(); visitPositionalArgs(node.getExpressions(), wrap); if( wrap ) { - appendNewLine(); decIndent(); - appendIndent(); + appendNewLine(); } append(')'); } @Override public void visitListExpression(ListExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); var wrap = hasTrailingComma(node) || shouldWrapExpression(node); append('['); if( wrap ) @@ -580,7 +776,6 @@ protected void visitPositionalArgs(List args, boolean wrap) { for( int i = 0; i < args.size(); i++ ) { if( wrap ) { appendNewLine(); - appendIndent(); } visit(args.get(i)); if( trailingComma || i + 1 < args.size() ) @@ -590,7 +785,7 @@ protected void visitPositionalArgs(List args, boolean wrap) { @Override public void visitMapExpression(MapExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); if( node.getMapEntryExpressions().isEmpty() ) { append("[:]"); return; @@ -601,9 +796,8 @@ public void visitMapExpression(MapExpression node) { incIndent(); visitNamedArgs(node.getMapEntryExpressions(), wrap); if( wrap ) { - appendNewLine(); decIndent(); - appendIndent(); + appendNewLine(); } append(']'); } @@ -614,7 +808,6 @@ protected void visitNamedArgs(List args, boolean wrap) { for( int i = 0; i < args.size(); i++ ) { if( wrap ) { appendNewLine(); - appendIndent(); } visit(args.get(i)); if( trailingComma || i + 1 < args.size() ) @@ -624,15 +817,16 @@ protected void visitNamedArgs(List args, boolean wrap) { @Override public void visitMapEntryExpression(MapEntryExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); visit(node.getKeyExpression()); - append(": "); + append(":"); + appendSpace(); visit(node.getValueExpression()); } @Override public void visitRangeExpression(RangeExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); visit(node.getFrom()); if( node.isExclusiveLeft() ) append('<'); @@ -644,51 +838,54 @@ public void visitRangeExpression(RangeExpression node) { @Override public void visitUnaryMinusExpression(UnaryMinusExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); append('-'); visit(node.getExpression()); } @Override public void visitUnaryPlusExpression(UnaryPlusExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); append('+'); visit(node.getExpression()); } @Override public void visitBitwiseNegationExpression(BitwiseNegationExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); append('~'); visit(node.getExpression()); } @Override public void visitCastExpression(CastExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); visit(node.getExpression()); - append(" as "); + appendSpace(); + append("as"); + appendSpace(); visitTypeAnnotation(node.getType()); } @Override public void visitConstantExpression(ConstantExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); var text = (String) node.getNodeMetaData(ASTNodeMarker.VERBATIM_TEXT); if( text != null ) append(text); else append(node.getText()); + appendTrailingCommentsExpression(node); } @Override public void visitClassExpression(ClassExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); visitTypeAnnotation(node.getType()); } public void visitTypeAnnotation(ClassNode type) { - appendLeadingComments(type); + appendLeadingCommentsExpression(type); if( isLegacyType(type) ) { append(type.getNodeMetaData(ASTNodeMarker.LEGACY_TYPE)); return; @@ -699,12 +896,13 @@ public void visitTypeAnnotation(ClassNode type) { @Override public void visitVariableExpression(VariableExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); if( inVariableDeclaration && isLegacyType(node.getType()) ) { visitTypeAnnotation(node.getType()); - append(' '); + appendSpace(); } append(node.getText()); + appendTrailingCommentsExpression(node); } @Override @@ -720,7 +918,7 @@ else if( node.isSafe() ) @Override public void visitGStringExpression(GStringExpression node) { - appendLeadingComments(node); + appendLeadingCommentsExpression(node); // see also: GStringUtil.writeToImpl() var quoteChar = (String) node.getNodeMetaData(ASTNodeMarker.QUOTE_CHAR, k -> DQ_STR); append(quoteChar); @@ -741,7 +939,6 @@ public void visitGStringExpression(GStringExpression node) { @Override public void visit(Expression node) { - appendLeadingComments(node); var number = (Number) node.getNodeMetaData(ASTNodeMarker.INSIDE_PARENTHESES_LEVEL); if( number != null && number.intValue() > 0 ) append('('); diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java index ce51de082a..ebf61c5fe6 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java @@ -33,6 +33,7 @@ import nextflow.script.parser.CommentWriter; import org.codehaus.groovy.ast.ClassNode; +import org.codehaus.groovy.ast.ASTNode; import org.codehaus.groovy.ast.expr.EmptyExpression; import org.codehaus.groovy.ast.expr.PropertyExpression; import org.codehaus.groovy.ast.expr.VariableExpression; @@ -142,6 +143,7 @@ else if( decl instanceof ProcessNode pn ) else if( decl instanceof WorkflowNode wn ) visitWorkflow(wn); } + fmt.appendBlanks(1); System.out.print(commentWriter.toNFComments()); } @@ -155,7 +157,9 @@ public String toString() { public void visitFeatureFlag(FeatureFlagNode node) { fmt.appendLeadingComments(node); fmt.append(node.name); - fmt.append(" = "); + fmt.appendSpace(); + fmt.append("="); + fmt.appendSpace(); fmt.visit(node.value); fmt.appendNewLine(); } @@ -164,7 +168,9 @@ public void visitFeatureFlag(FeatureFlagNode node) { public void visitInclude(IncludeNode node) { var wrap = node.getLineNumber() < node.getLastLineNumber(); fmt.appendLeadingComments(node); - fmt.append("include {"); + fmt.append("include"); + fmt.appendSpace(); + fmt.append('{'); if( wrap ) fmt.incIndent(); for( int i = 0; i < node.entries.size(); i++ ) { @@ -173,29 +179,35 @@ public void visitInclude(IncludeNode node) { fmt.appendIndent(); } else { - fmt.append(' '); + fmt.appendSpace(); } var entry = node.entries.get(i); fmt.append(entry.name); if( entry.alias != null ) { - fmt.append(" as "); + fmt.appendSpace(); + fmt.append("as"); + fmt.appendSpace(); fmt.append(entry.alias); } if( !wrap && node.entries.size() == 1 && options.harshilAlignment() ) { var padding = maxIncludeWidth - getIncludeWidth(entry); - fmt.append(" ".repeat(padding)); + fmt.appendPadding(padding); } if( i + 1 < node.entries.size() ) - fmt.append(" ;"); + fmt.appendSpace(); + fmt.append(";"); } if( wrap ) { fmt.appendNewLine(); fmt.decIndent(); } else { - fmt.append(' '); + fmt.appendSpace(); } - fmt.append("} from "); + fmt.append('}'); + fmt.appendSpace(); + fmt.append("from"); + fmt.appendSpace(); fmt.visit(node.source); fmt.appendNewLine(); } @@ -213,9 +225,11 @@ public void visitParam(ParamNode node) { fmt.visit(node.target); if( maxParamWidth > 0 ) { var padding = maxParamWidth - getParamWidth(node); - fmt.append(" ".repeat(padding)); + fmt.appendPadding(padding); } - fmt.append(" = "); + fmt.appendSpace(); + fmt.append("="); + fmt.appendSpace(); fmt.visit(node.value); fmt.appendNewLine(); } @@ -228,47 +242,47 @@ protected int getParamWidth(ParamNode node) { @Override public void visitWorkflow(WorkflowNode node) { - var leadingComments = commentWriter.getLeadingComments(node); - var withinComments = commentWriter.getWithinComments(node); - fmt.appendLeadingComments(node); + var withinComments = commentWriter.getWithinComments(node); + fmt.appendNewLine(); fmt.append("workflow"); - fmt.append(' '); - fmt.writeComments(withinComments, "KEYWORD", false); + fmt.appendSpace(); + fmt.writeComments(withinComments, "KEYWORD", false, true, false, false, true); if( !node.isEntry() ) { fmt.append(node.getName()); - fmt.append(' '); - fmt.writeComments(withinComments, "NAME", false); + fmt.appendSpace(); + fmt.writeComments(withinComments, "NAME", false, true, false, false, true); } - fmt.append("{\n"); + fmt.append("{"); fmt.incIndent(); if( node.takes instanceof BlockStatement ) { - fmt.appendIndent(); - fmt.append("take:\n"); + appendColonStatement("take", "TAKE", node.takes); visitWorkflowTakes(asBlockStatements(node.takes)); fmt.appendNewLine(); } if( node.main instanceof BlockStatement ) { if( node.takes instanceof BlockStatement || node.emits instanceof BlockStatement || node.publishers instanceof BlockStatement ) { fmt.appendIndent(); - fmt.append("main:\n"); + appendColonStatement("main", "MAIN", node.main); } fmt.visit(node.main); } if( node.emits instanceof BlockStatement ) { fmt.appendNewLine(); fmt.appendIndent(); - fmt.append("emit:\n"); + appendColonStatement("emit", "EMIT", node.takes); visitWorkflowEmits(asBlockStatements(node.emits)); } if( node.publishers instanceof BlockStatement ) { fmt.appendNewLine(); fmt.appendIndent(); - fmt.append("publish:\n"); + appendColonStatement("publish", "PUBLISH", node.publishers); fmt.visit(node.publishers); } fmt.decIndent(); - fmt.append("}\n"); + fmt.appendNewLine(); + fmt.append("}"); + fmt.appendTrailingComments(node); } protected void visitWorkflowTakes(List takes) { @@ -280,13 +294,13 @@ protected void visitWorkflowTakes(List takes) { var ve = asVarX(stmt); fmt.appendIndent(); fmt.visit(ve); - if( fmt.hasTrailingComment(stmt) ) { - if( alignmentWidth > 0 ) { - var padding = alignmentWidth - ve.getName().length(); - fmt.append(" ".repeat(padding)); - } - fmt.appendTrailingComment(stmt); - } + // if( fmt.hasTrailingComment(stmt) ) { + // if( alignmentWidth > 0 ) { + // var padding = alignmentWidth - ve.getName().length(); + // fmt.append(" ".repeat(padding)); + // } + // } + fmt.appendTrailingComments(stmt); fmt.appendNewLine(); } } @@ -305,23 +319,25 @@ protected void visitWorkflowEmits(List emits) { fmt.visit(ve); if( alignmentWidth > 0 ) { var padding = alignmentWidth - ve.getName().length(); - fmt.append(" ".repeat(padding)); + fmt.appendPadding(padding); } - fmt.append(" = "); + fmt.appendSpace(); + fmt.append("="); + fmt.appendSpace(); fmt.visit(assign.getRightExpression()); - fmt.appendTrailingComment(stmt); + fmt.appendTrailingComments(stmt); fmt.appendNewLine(); } else if( emit instanceof VariableExpression ve ) { fmt.appendIndent(); fmt.visit(ve); - if( fmt.hasTrailingComment(stmt) ) { - if( alignmentWidth > 0 ) { - var padding = alignmentWidth - ve.getName().length(); - fmt.append(" ".repeat(padding)); - } - fmt.appendTrailingComment(stmt); - } + // if( fmt.hasTrailingComment(stmt) ) { + // if( alignmentWidth > 0 ) { + // var padding = alignmentWidth - ve.getName().length(); + // fmt.append(" ".repeat(padding)); + // } + // } + fmt.appendTrailingComments(stmt); fmt.appendNewLine(); } else { @@ -359,145 +375,146 @@ public void visitProcess(ProcessNode node) { var trailingComments = commentWriter.getTrailingComments(node); System.err.println(trailingComments); fmt.appendLeadingComments(node); - fmt.append("process "); - fmt.writeComments(withinComments, "KEYWORD", false); + fmt.appendNewLine(); + fmt.append("process"); + fmt.appendSpace(); + fmt.writeComments(withinComments, "KEYWORD", false, false, true, false, true); fmt.append(node.getName()); - fmt.writeComments(withinComments, "NAME", false); - fmt.append(" {"); - fmt.writeComments(trailingComments, "LBRACE", false); - fmt.append("\n"); + fmt.writeComments(withinComments, "NAME", false, true, false, false, true); + fmt.appendSpace(); + fmt.append("{"); + fmt.writeComments(trailingComments, "LBRACE", false, true, true, false, false); fmt.incIndent(); + if( node.directives instanceof BlockStatement ) { visitDirectives(node.directives); - fmt.appendNewLine(); + fmt.appendBlanks(1); } + if( node.inputs instanceof BlockStatement ) { - fmt.appendIndent(); - fmt.appendLeadingComments(node.inputs); - var inputWithinComments = commentWriter.getWithinComments(node.inputs); - var inputTrailingComments = commentWriter.getTrailingComments(node.inputs); - fmt.append("input"); - fmt.writeComments(inputWithinComments, "INPUT", false); - fmt.append(":"); - fmt.writeComments(inputTrailingComments, "COLON", false); - fmt.append("\n"); + appendColonStatement("input", "INPUT", node.inputs); visitDirectives(node.inputs); - fmt.appendNewLine(); + fmt.appendBlanks(1); } + if( !options.maheshForm() && node.outputs instanceof BlockStatement ) { - visitProcessOutputs(node.outputs); - fmt.appendNewLine(); + appendColonStatement("output", "OUTPUT", node.outputs); + visitDirectives(node.outputs); + fmt.appendBlanks(1); } + if( !(node.when instanceof EmptyExpression) ) { - fmt.appendIndent(); - var whenWithinComments = commentWriter.getWithinComments(node.when); - var whenTrailingComments = commentWriter.getTrailingComments(node.when); - fmt.appendLeadingComments(node.when); - fmt.append("when"); - fmt.writeComments(whenWithinComments, "WHEN", false); - fmt.append(":"); - fmt.writeComments(whenTrailingComments, "COLON", false); - fmt.append("\n"); + appendColonStatement("when", "WHEN", node.when); fmt.incIndent(); - fmt.appendIndent(); + fmt.appendNewLine(); fmt.visit(node.when); - fmt.append("\n\n"); + fmt.appendBlanks(1); } - fmt.appendLeadingComments(node.exec); - fmt.appendIndent(); - var execWithinComments = commentWriter.getWithinComments(node.exec); - var execTrailingComments = commentWriter.getTrailingComments(node.exec); - fmt.append(node.type); - fmt.writeComments(execWithinComments, "EXEC", false); - fmt.append(":"); - fmt.writeComments(execTrailingComments, "COLON", false); - fmt.append("\n"); + + appendColonStatement(node.type, "EXEC", node.exec); fmt.visit(node.exec); + fmt.appendBlanks(1); + if( !(node.stub instanceof EmptyStatement) ) { - fmt.appendNewLine(); - fmt.appendIndent(); - fmt.appendLeadingComments(node.stub); - var commentWriter = getCommentWriter(); - var stubWithinComments = commentWriter.getWithinComments(node.stub); - var stubTrailingComments = commentWriter.getTrailingComments(node.stub); - fmt.append("stub"); - fmt.writeComments(stubWithinComments, "STUB", false); - fmt.append(":"); - fmt.writeComments(stubTrailingComments, "COLON", false); + appendColonStatement("stub", "STUB", node.stub); fmt.visit(node.stub); + fmt.appendBlanks(1); } + if( options.maheshForm() && node.outputs instanceof BlockStatement ) { - fmt.appendNewLine(); - visitProcessOutputs(node.outputs); + appendColonStatement("output", "OUTPUT", node.outputs); + visitDirectives(node.outputs); + fmt.appendBlanks(1); } + fmt.decIndent(); + fmt.appendNewLine(); fmt.append("}"); - fmt.writeComments(trailingComments, "RBRACE", false); - fmt.append("\n"); + fmt.writeComments(trailingComments, "RBRACE", false, true, true, false, false); } - private void visitProcessOutputs(Statement outputs) { - fmt.appendIndent(); - fmt.append("output:\n"); - visitDirectives(outputs); + private void appendColonStatement(String name, String commentKey, ASTNode stmt) { + fmt.appendLeadingComments(stmt); + fmt.appendNewLine(); + var colonWithinComments = commentWriter.getWithinComments(stmt); + var colonTrailingComments = commentWriter.getTrailingComments(stmt); + // Write the name of the statement + fmt.append(name); + // Write commend inbetween the name and the colon + fmt.appendWithinComments(colonWithinComments, commentKey); + fmt.append(":"); + // Write trailing comments after the colon + fmt.appendTrailingInside(colonTrailingComments, "COLON"); } @Override public void visitFunction(FunctionNode node) { fmt.appendLeadingComments(node); - fmt.append("def "); + fmt.appendNewLine(); + fmt.append("def"); + fmt.appendSpace(); if( Formatter.isLegacyType(node.getReturnType()) ) { fmt.visitTypeAnnotation(node.getReturnType()); - fmt.append(' '); + fmt.appendSpace(); } fmt.append(node.getName()); fmt.append('('); fmt.visitParameters(node.getParameters()); - fmt.append(") {\n"); + fmt.append(')'); + fmt.appendSpace(); + fmt.append('{'); fmt.incIndent(); fmt.visit(node.getCode()); fmt.decIndent(); - fmt.append("}\n"); + fmt.appendNewLine(); + fmt.append("}"); } @Override public void visitEnum(ClassNode node) { fmt.appendLeadingComments(node); - fmt.append("enum "); + fmt.appendNewLine(); + fmt.append("enum"); + fmt.appendSpace(); fmt.append(node.getName()); - fmt.append(" {\n"); fmt.incIndent(); + fmt.appendSpace(); + fmt.append("{"); for( var fn : node.getFields() ) { - fmt.appendIndent(); + fmt.appendNewLine(); fmt.append(fn.getName()); fmt.append(','); - fmt.appendNewLine(); } fmt.decIndent(); - fmt.append("}\n"); + fmt.appendNewLine(); + fmt.append("}"); } @Override public void visitOutputs(OutputBlockNode node) { fmt.appendLeadingComments(node); - fmt.append("output {\n"); + fmt.appendNewLine(); + fmt.append("output"); + fmt.appendSpace(); + fmt.append('{'); fmt.incIndent(); super.visitOutputs(node); fmt.decIndent(); - fmt.append("}\n"); + fmt.appendNewLine(); + fmt.append("}"); } @Override public void visitOutput(OutputNode node) { fmt.appendLeadingComments(node); - fmt.appendIndent(); + fmt.appendNewLine(); fmt.append(node.name); - fmt.append(" {\n"); - fmt.incIndent(); + fmt.appendSpace(); + fmt.append("{"); visitOutputBody((BlockStatement) node.body); fmt.decIndent(); - fmt.appendIndent(); - fmt.append("}\n"); + fmt.appendNewLine(); + fmt.append("}"); } protected void visitOutputBody(BlockStatement block) { @@ -514,12 +531,15 @@ protected void visitOutputBody(BlockStatement block) { fmt.appendLeadingComments(stmt); fmt.appendIndent(); fmt.append(name); - fmt.append(" {\n"); + fmt.appendSpace(); + fmt.append("{"); fmt.incIndent(); + fmt.appendNewLine(); visitDirectives(code); fmt.decIndent(); fmt.appendIndent(); - fmt.append("}\n"); + fmt.append("}"); + fmt.appendNewLine(); return; } } diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java index d1f9fa809b..96f7fe54ea 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java @@ -177,7 +177,7 @@ public List processLeadingComments(ParserRuleContext ctx) { for (int i = contextStartIndex - 1; i >= firstCommentIndex; i--) { TokenInfo token = tokensWithInfo.get(i); if( token.isComment() && !token.getComment().isConsumed() ) { - comments.add(token.getComment()); + comments.add(0, token.getComment()); } else if( token.getToken().getType() == ScriptLexer.NL ){ continue; } else { @@ -203,7 +203,6 @@ public List processInbetweenComments( while( it.hasNext() ) { Comment comment = it.next(); if (comment.getStartIndex() >= startIdx && comment.getStopIndex() <= endIdx) { - System.err.println(comment.getToken().getLine() + ":" + comment.getToken().getCharPositionInLine() + " " + comment.getToken().getText()); comments.add(comment); } } @@ -224,7 +223,6 @@ public List processTrailingComments(Token lastCtxToken, String position // A trailing comment is either on the same line as the last token of the context, // or on the next line line after the last token of the context, but in this case it must // be separated by at least one blank line from the next context - List comments = new ArrayList<>(); int contextStopIndex = lastCtxToken.getTokenIndex(); ListIterator it = tokensWithInfo.listIterator(contextStopIndex + 1); @@ -233,7 +231,6 @@ public List processTrailingComments(Token lastCtxToken, String position // First get tokens that are directly after the context, without any newlines while( it.hasNext() ) { token = it.next(); - System.err.println(token); if (token.isComment() && !token.getComment().isConsumed()) { comments.add(token.getComment()); } else { @@ -326,7 +323,7 @@ public Comment(Token token) { public void attachTo(ASTNode node) { if (attachedTo != null) { - throw new IllegalStateException("Comment already attached to " + attachedTo); + throw new IllegalStateException(this + " already attached to " + attachedTo); } this.attachedTo = node; pendingComments.remove(this); @@ -342,7 +339,7 @@ public void attachTo(ASTNode node) { public void makePending() { if (consumed) { - throw new IllegalStateException("Cannot make comment " + token.getText() + " pending since it already is"); + throw new IllegalStateException(this + " is already consumed"); } unattachedComments.remove(this); pendingComments.add(this); @@ -417,14 +414,21 @@ public boolean isWritten() { public Tuple2 write() { if( isWritten ) { - throw new IllegalStateException("Comment already written"); + throw new IllegalStateException(this + " already written"); } isWritten = true; return new Tuple2<>(token.getText(), token.getType() == ScriptLexer.SL_COMMENT); } - - + @Override + public String toString() { + return String.format( + "Comment:%d:%d:'%s'", + token.getLine(), + token.getCharPositionInLine(), + token.getText().replaceAll("\n", "\\n") + ); + } @Override public int hashCode() { return token.getTokenIndex(); diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java index dad6ae981f..c1363b5d71 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/ScriptAstBuilder.java @@ -152,10 +152,8 @@ public ScriptAstBuilder(SourceUnit sourceUnit) { .filter(token -> token.getChannel() == ScriptLexer.DEFAULT_TOKEN_CHANNEL) .toList(); - System.err.println("DEBUG: Found " + commentTokensList.size() + " comment tokens out of " + allTokens.size() + " total tokens"); for (int i = 0; i < commentTokensList.size(); i++) { var token = commentTokensList.get(i); - System.err.println("DEBUG: Comment token " + i + ": '" + token.getText() + "' (type: " + token.getType() + ", channel: " + token.getChannel() + ")"); } this.commentWriter = new CommentWriter(allTokens); @@ -414,10 +412,18 @@ private ProcessNode processDef(ProcessDefContext ctx) { var inputs = processInputs(ctx.body.processInputs()); var outputs = processOutputs(ctx.body.processOutputs()); var when = processWhen(ctx.body.processWhen()); - var type = processExec(ctx.body.processExec()); - var exec = ctx.body.blockStatements() != null - ? blockStatements(ctx.body.blockStatements()) - : blockStatements(ctx.body.processExec().blockStatements()); + + String type; + BlockStatement exec; + if (ctx.body.processExec() == null) { + type = "script"; + exec = blockStatements(ctx.body.blockStatements()); + } else { + Tuple2 t = processExec(ctx.body.processExec()); + type = t.getV1(); + exec = t.getV2(); + } + var stub = processStub(ctx.body.processStub()); if( ctx.body.blockStatements() != null ) { @@ -559,30 +565,28 @@ private Expression processWhen(ProcessWhenContext ctx) { return result; } - private String processExec(ProcessExecContext ctx) { + private Tuple2 processExec(ProcessExecContext ctx) { - if( ctx == null ) - // If there is no body default to body - return "script"; - else { - var execTypeToken = ctx.execType; - - // Collect comments - var comments = commentWriter.processLeadingComments(ctx); - comments.addAll(commentWriter.processInbetweenComments( - execTypeToken, ctx.COLON().getSymbol(), "EXEC", false, true - )); - // Capture any trailing comment that occurs after the colon - comments.addAll( - commentWriter.processTrailingComments( - ctx.COLON(), false - ) - ); - if (execTypeToken.getType() == ScriptLexer.SHELL) - collectWarning("The `shell` block is deprecated, use `script` instead", ctx.SHELL().getText(), ast( new EmptyExpression(), ctx.SHELL() )); - - return execTypeToken.getText(); - } + var execTypeToken = ctx.execType; + + // Collect comments + var comments = commentWriter.processLeadingComments(ctx); + comments.addAll(commentWriter.processInbetweenComments( + execTypeToken, ctx.COLON().getSymbol(), "EXEC", false, true + )); + // Capture any trailing comment that occurs after the colon + comments.addAll( + commentWriter.processTrailingComments( + ctx.COLON(), false + ) + ); + + if (execTypeToken.getType() == ScriptLexer.SHELL) + collectWarning("The `shell` block is deprecated, use `script` instead", ctx.SHELL().getText(), ast( new EmptyExpression(), ctx.SHELL() )); + + var result = blockStatements(ctx.blockStatements()); + commentWriter.attachComments(result, comments); + return new Tuple2<>(execTypeToken.getText(), result); } private Statement processStub(ProcessStubContext ctx) { @@ -932,6 +936,7 @@ private Expression variableName(IdentifierContext ctx) { var name = identifier(ctx); var result = ast( varX(name), ctx ); checkInvalidVarName(name, result); + comments.addAll(commentWriter.processTrailingComments(ctx.getStop(), false)); commentWriter.attachComments(result, comments); return result; } @@ -1279,22 +1284,28 @@ private String keywords(KeywordsContext ctx) { } private Expression literal(LiteralContext ctx) { + Expression result; + var comments = commentWriter.processLeadingComments(ctx); if( ctx instanceof IntegerLiteralAltContext iac ) - return ast( integerLiteral(iac), iac ); + result = ast( integerLiteral(iac), iac ); - if( ctx instanceof FloatingPointLiteralAltContext fac ) - return ast( floatingPointLiteral(fac), fac ); + else if( ctx instanceof FloatingPointLiteralAltContext fac ) + result = ast( floatingPointLiteral(fac), fac ); - if( ctx instanceof StringLiteralAltContext sac ) - return ast( string(sac.stringLiteral()), sac ); + else if( ctx instanceof StringLiteralAltContext sac ) + result = ast( string(sac.stringLiteral()), sac ); - if( ctx instanceof BooleanLiteralAltContext bac ) - return ast( constX("true".equals(bac.getText())), bac ); + else if( ctx instanceof BooleanLiteralAltContext bac ) + result = ast( constX("true".equals(bac.getText())), bac ); - if( ctx instanceof NullLiteralAltContext nac ) - return ast( constX(null), nac ); + else if( ctx instanceof NullLiteralAltContext nac ) + result = ast( constX(null), nac ); - throw createParsingFailedException("Invalid expression: " + ctx.getText(), ctx); + else + throw createParsingFailedException("Invalid expression: " + ctx.getText(), ctx); + comments.addAll(commentWriter.processTrailingComments(ctx.getStop(), false)); + commentWriter.attachComments(result, comments); + return result; } private Expression integerLiteral(IntegerLiteralAltContext ctx) { From 1102ed54f77c7d5693bc9b279e50b74abaed91a3 Mon Sep 17 00:00:00 2001 From: ErikDanielsson Date: Thu, 7 Aug 2025 15:46:53 +0200 Subject: [PATCH 5/5] Write message if not all commments are written --- .../nextflow/script/formatter/Formatter.java | 5 + .../formatter/ScriptFormattingVisitor.java | 2 +- .../nextflow/script/parser/CommentWriter.java | 126 +++++++++++++++--- 3 files changed, 111 insertions(+), 22 deletions(-) diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java index 8a951b497b..595e09064f 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/Formatter.java @@ -63,6 +63,7 @@ import org.codehaus.groovy.ast.stmt.TryCatchStatement; import org.codehaus.groovy.runtime.DefaultGroovyMethods; import org.codehaus.groovy.syntax.Types; +import org.stringtemplate.v4.compiler.STParser.compoundElement_return; import static nextflow.script.ast.ASTUtils.*; @@ -91,6 +92,10 @@ public Formatter(FormattingOptions options, CommentWriter commentWriter) { this.commentWriter = commentWriter; } + public boolean exit() { + return commentWriter.verifyAllWritten(); + } + public void append(char c) { builder.append(c); } diff --git a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java index ebf61c5fe6..1c85016a7f 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java +++ b/modules/nf-lang/src/main/java/nextflow/script/formatter/ScriptFormattingVisitor.java @@ -144,7 +144,7 @@ else if( decl instanceof WorkflowNode wn ) visitWorkflow(wn); } fmt.appendBlanks(1); - System.out.print(commentWriter.toNFComments()); + fmt.exit(); } public String toString() { diff --git a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java index 96f7fe54ea..4f162da108 100644 --- a/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java +++ b/modules/nf-lang/src/main/java/nextflow/script/parser/CommentWriter.java @@ -26,6 +26,7 @@ public class CommentWriter { private final List tokensWithInfo; private final Set pendingComments = new HashSet<>(); private final Map> attachedComments = new HashMap<>(); + private final Map> writtenComments = new HashMap<>(); private final Map> leadingComments = new HashMap<>(); private final Map>> withinComments = new HashMap<>(); @@ -73,11 +74,10 @@ public String toString() { } } + String indent = " "; - public String toNFComments() { - var header = "// CommentWriter with " + commentTokens.size() + " comment tokens"; - var unattachedHeader = "// Unattached comments:"; - var indent = " "; + public String writeUnattached() { + var unattachedHeader = "Unattached comments:"; var unattachedText = unattachedComments.stream() .map(t -> { var line = t.getToken().getLine(); @@ -88,27 +88,35 @@ public String toNFComments() { var attachedTo = t.getAttachedTo(); var attachedToText = attachedTo != null ? attachedTo.getText() : "null"; return ( - "// " + indent + line + ":" + column + "{\n" + - "// " + indent + indent + "content: " + text + "\n" + - "// " + indent + indent + "attach: " + attachedToText + "\n" + - "// " + indent + "}\n" + indent + line + ":" + column + "{\n" + + indent + indent + "content: " + text + "\n" + + indent + indent + "attach: " + attachedToText + "\n" + + indent + "}\n" ); }) .collect(Collectors.joining("\n")); - var pendingHeader = "// Pending comments:"; + return unattachedHeader + unattachedText; + } + + public String writePending() { + var pendingHeader = "Pending comments:"; var pendingText = pendingComments.stream() .map(t -> { var line = t.getToken().getLine(); var column = t.getToken().getCharPositionInLine(); var text = t.getToken().getText().replaceAll("\n", "\\n"); return ( - "// " + indent + line + ":" + column + "{\n" + - "// " + indent + indent + "content: " + text + "\n" + - "// " + indent + "}\n" + indent + line + ":" + column + "{\n" + + indent + indent + "content: " + text + "\n" + + indent + "}\n" ); }) .collect(Collectors.joining("\n")); - var attachedHeader = "// Attached comments:"; + return pendingHeader + pendingText; + } + + public String writeAttached() { + var attachedHeader = "Attached comments:"; var attachedText = attachedComments.values() .stream() .flatMap(List::stream) @@ -129,17 +137,63 @@ public String toNFComments() { (t.isTrailing() ? "trailing " : "") ); return ( - "// " + indent + line + ":" + column + "{\n" + - "// " + indent + indent + "content : " + text + "\n" + - "// " + indent + indent + "attached to: " + attachedToText + "\n" + - "// " + indent + indent + "token : " + t.getPositionInfo() + "\n" + - "// " + indent + indent + "written : " + t.isWritten() + "\n" + - "// " + indent + indent + "rel. pos. : " + relPos + "\n" + - "// " + indent + "}\n" + indent + line + ":" + column + "{\n" + + indent + indent + "content : " + text + "\n" + + indent + indent + "attached to: " + attachedToText + "\n" + + indent + indent + "token : " + t.getPositionInfo() + "\n" + + indent + indent + "written : " + t.isWritten() + "\n" + + indent + indent + "rel. pos. : " + relPos + "\n" + + indent + "}\n" + ); + }) + .collect(Collectors.joining("\n")); + return attachedHeader + attachedText; + } + + public String writeWritten() { + var writtenHeader = "Written comments:"; + var writtenText = writtenComments.values() + .stream() + .flatMap(Set::stream) + .map(t -> { + var line = t.getToken().getLine(); + var column = t.getToken().getCharPositionInLine(); + var text = t.getToken().getText().replaceAll("\n", "\\n"); + var isLeading = t.isLeading(); + var isTrailing = t.isTrailing(); + ASTNode attachedTo = t.getAttachedTo(); + var simpleName = attachedTo != null ? attachedTo.getClass().getSimpleName() : null; + var maybeText = attachedTo != null ? attachedTo.getText(): null; + var attachToCode = maybeText != null ? maybeText.replaceAll("\n", "\\n") : "null"; + var attachedToText = attachedTo != null ? simpleName + " " + attachToCode : "null"; + var relPos = ( + (t.isWithin() ? "within " : "")+ + (t.isLeading() ? "leading " : "") + + (t.isTrailing() ? "trailing " : "") + ); + return ( + indent + line + ":" + column + "{\n" + + indent + indent + "content : " + text + "\n" + + indent + indent + "attached to: " + attachedToText + "\n" + + indent + indent + "token : " + t.getPositionInfo() + "\n" + + indent + indent + "written : " + t.isWritten() + "\n" + + indent + indent + "rel. pos. : " + relPos + "\n" + + indent + "}\n" ); }) .collect(Collectors.joining("\n")); - return header + "\n" + unattachedHeader + "\n" + unattachedText + "\n" + pendingHeader + "\n" + pendingText + "\n" + attachedHeader + "\n" + attachedText; + return writtenHeader + writtenText; + } + + + public String writeUnWritten() { + var header = "CommentWriter with " + commentTokens.size() + " comment tokens"; + return header + "\n" + writeUnattached() + "\n" + writePending() + "\n"+ writeAttached() + "\n"; + } + + public String writeAll() { + var header = "CommentWriter with " + commentTokens.size() + " comment tokens"; + return header + "\n" + writeUnattached() + "\n" + writePending() + "\n"+ writeUnattached() + "\n" + writeWritten(); } public Map> getAttachedComments(ASTNode node) { @@ -300,6 +354,23 @@ public Map> getTrailingComments(ASTNode node) { return trailingComments.getOrDefault(node, new HashMap<>()); } + public boolean verifyAllWritten() { + if ( + !( + unattachedComments.size() == 0 && + pendingComments.size() == 0 && + attachedComments.size() == 0 + ) + ) { + // Likely replace this error message with an exception + System.err.println("The following comments are left to be written"); + System.err.println(writeUnWritten()); + return false; + } else { + return true; + } + } + public class Comment { private final Token token; private boolean isLeading = false; @@ -417,6 +488,19 @@ public Tuple2 write() { throw new IllegalStateException(this + " already written"); } isWritten = true; + + writtenComments.computeIfAbsent(attachedTo, k -> new HashSet<>()).add(this); + var attachedNodeComments = attachedComments.get(attachedTo); + if (attachedNodeComments.contains(this)) { + if (attachedNodeComments.size() == 1) { + attachedComments.remove(attachedTo); + } else { + attachedNodeComments.remove(this); + } + } else { + throw new IllegalStateException(this + " was not attached to " + attachedTo); + } + return new Tuple2<>(token.getText(), token.getType() == ScriptLexer.SL_COMMENT); }