Skip to content

Commit 3b007c2

Browse files
authored
Improve whitespace regex detection (#1589)
Previously, splitting a terminal rule into separate "spaces/tabs" and "newlines" terminals would cause the "newlines" one to be detected as a comment when generating TextMate grammar
1 parent 3e62f1a commit 3b007c2

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

packages/langium/src/utils/grammar-utils.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import type { AstNode, CstNode } from '../syntax-tree.js';
1010
import { isCompositeCstNode } from '../syntax-tree.js';
1111
import { getContainerOfType, streamAllContents } from './ast-utils.js';
1212
import { streamCst } from './cst-utils.js';
13-
import { escapeRegExp } from './regexp-utils.js';
13+
import { escapeRegExp, isWhitespace } from './regexp-utils.js';
1414

1515
/**
1616
* Returns the entry rule of the given grammar, if any. If the grammar file does not contain an entry rule,
@@ -92,7 +92,7 @@ export function getCrossReferenceTerminal(crossRef: ast.CrossReference): ast.Abs
9292
* that contains visible characters is considered a comment.
9393
*/
9494
export function isCommentTerminal(terminalRule: ast.TerminalRule): boolean {
95-
return terminalRule.hidden && !terminalRegex(terminalRule).test(' ');
95+
return terminalRule.hidden && !isWhitespace(terminalRegex(terminalRule));
9696
}
9797

9898
/**

packages/langium/src/utils/regexp-utils.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,17 @@ export function isMultilineComment(regexp: RegExp | string): boolean {
138138
}
139139
}
140140

141+
/**
142+
* A set of all characters that are considered whitespace by the '\s' RegExp character class.
143+
* Taken from [MDN](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes).
144+
*/
145+
export const whitespaceCharacters = (
146+
'\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007' +
147+
'\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff').split('');
148+
141149
export function isWhitespace(value: RegExp | string): boolean {
142150
const regexp = typeof value === 'string' ? new RegExp(value) : value;
143-
return regexp.test(' ');
151+
return whitespaceCharacters.some((ws) => regexp.test(ws));
144152
}
145153

146154
export function escapeRegExp(value: string): string {

0 commit comments

Comments
 (0)