Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions rewrite-xml/src/main/antlr/XPathLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright 2024 the original author or authors.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* https://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* XPath lexer for a limited subset of XPath expressions.
* Supports absolute and relative paths, wildcards, predicates,
* attribute access, and common XPath functions.
*/
lexer grammar XPathLexer;

// Whitespace
WS : [ \t\r\n]+ -> skip ;

// Path separators
SLASH : '/' ;
DOUBLE_SLASH : '//' ;

// Brackets
LBRACKET : '[' ;
RBRACKET : ']' ;
LPAREN : '(' ;
RPAREN : ')' ;

// Operators
AT : '@' ;
DOT : '.' ;
COMMA : ',' ;
EQUALS : '=' ;
NOT_EQUALS : '!=' ;
LTE : '<=' ; // Must come before LT for proper lexing
GTE : '>=' ; // Must come before GT for proper lexing
LT : '<' ;
GT : '>' ;
WILDCARD : '*' ;

// Numbers
NUMBER : [0-9]+ ('.' [0-9]+)? ;

// Logical operators (for predicate conditions)
AND : 'and' ;
OR : 'or' ;

// XPath functions
LOCAL_NAME : 'local-name' ;
NAMESPACE_URI : 'namespace-uri' ;

// String literals
STRING_LITERAL
: '\'' (~['])* '\''
| '"' (~["])* '"'
;

// NCName (Non-Colonized Name) - XML name without colons
// QName (Qualified Name) - NCName with optional prefix
// We use a combined rule that allows optional namespace prefix
QNAME
: NCNAME (':' NCNAME)?
;

fragment NCNAME
: NAME_START_CHAR NAME_CHAR*
;

fragment NAME_START_CHAR
: [a-zA-Z_]
| '\u00C0'..'\u00D6'
| '\u00D8'..'\u00F6'
| '\u00F8'..'\u02FF'
| '\u0370'..'\u037D'
| '\u037F'..'\u1FFF'
| '\u200C'..'\u200D'
| '\u2070'..'\u218F'
| '\u2C00'..'\u2FEF'
| '\u3001'..'\uD7FF'
| '\uF900'..'\uFDCF'
| '\uFDF0'..'\uFFFD'
;

fragment NAME_CHAR
: NAME_START_CHAR
| '-'
| '.'
| [0-9]
| '\u00B7'
| '\u0300'..'\u036F'
| '\u203F'..'\u2040'
;
33 changes: 33 additions & 0 deletions rewrite-xml/src/main/antlr/XPathLexer.tokens
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
WS=1
SLASH=2
DOUBLE_SLASH=3
LBRACKET=4
RBRACKET=5
LPAREN=6
RPAREN=7
AT=8
DOT=9
COMMA=10
EQUALS=11
WILDCARD=12
AND=13
OR=14
LOCAL_NAME=15
NAMESPACE_URI=16
STRING_LITERAL=17
QNAME=18
'/'=2
'//'=3
'['=4
']'=5
'('=6
')'=7
'@'=8
'.'=9
','=10
'='=11
'*'=12
'and'=13
'or'=14
'local-name'=15
'namespace-uri'=16
177 changes: 177 additions & 0 deletions rewrite-xml/src/main/antlr/XPathParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Copyright 2024 the original author or authors.
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* https://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* XPath parser for a limited subset of XPath expressions.
*
* Supports:
* - Absolute paths: /root/child
* - Relative paths: child/grandchild
* - Descendant-or-self: //element
* - Wildcards: /root/*
* - Attribute access: /root/@attr, /root/element/@*
* - Node type tests: /root/element/text(), /root/comment(), etc.
* - Predicates with conditions: /root/element[@attr='value']
* - Child element predicates: /root/element[child='value']
* - Positional predicates: /root/element[1], /root/element[last()]
* - XPath functions: local-name(), namespace-uri(), text(), contains(), position(), last(), etc.
* - Logical operators in predicates: and, or
* - Multiple predicates: /root/element[@attr='value'][local-name()='element']
* - Top-level function expressions: contains(/root/element, 'value')
* - Boolean expressions: not(contains(...)), string-length(...) > 2
*/
parser grammar XPathParser;

options { tokenVocab=XPathLexer; }

// Entry point for XPath expression
xpathExpression
: booleanExpr
| absoluteLocationPath
| relativeLocationPath
;

// Boolean expression (function calls with optional comparison)
booleanExpr
: functionCall comparisonOp comparand
| functionCall
;

// Comparison operators
comparisonOp
: EQUALS
| NOT_EQUALS
| LT
| GT
| LTE
| GTE
;

// Value to compare against
comparand
: stringLiteral
| NUMBER
;

// Absolute path starting with / or //
absoluteLocationPath
: SLASH relativeLocationPath?
| DOUBLE_SLASH relativeLocationPath
;

// Relative path (series of steps)
relativeLocationPath
: step (pathSeparator step)*
;

// Path separator between steps
pathSeparator
: SLASH
| DOUBLE_SLASH
;

// A single step in the path
step
: nodeTest predicate*
| attributeStep predicate*
| nodeTypeTest
;

// Node type test - text(), comment(), node(), processing-instruction()
// Validation of which functions are valid node type tests happens at runtime
nodeTypeTest
: QNAME LPAREN RPAREN
;

// Attribute step (@attr or @*)
attributeStep
: AT (QNAME | WILDCARD)
;

// Node test (element name or wildcard)
nodeTest
: QNAME
| WILDCARD
;

// Predicate in square brackets
predicate
: LBRACKET predicateExpr RBRACKET
;

// Predicate expression (supports and/or)
predicateExpr
: orExpr
;

// OR expression (lowest precedence)
orExpr
: andExpr (OR andExpr)*
;

// AND expression (higher precedence than OR)
andExpr
: primaryExpr (AND primaryExpr)*
;

// Primary expression in a predicate
primaryExpr
: predicateValue comparisonOp comparand // any value expression with comparison
| predicateValue // standalone value (last(), position(), number, boolean)
;

// A value-producing expression in a predicate
predicateValue
: functionCall // local-name(), last(), position(), contains(), etc.
| attributeStep // @attr, @*
| relativeLocationPath // bar/baz/text()
| childElementTest // child, *
| NUMBER // positional predicate [1], [2], etc.
;

// XPath function call - unified for both top-level and predicate use
functionCall
: LOCAL_NAME LPAREN RPAREN
| NAMESPACE_URI LPAREN RPAREN
| QNAME LPAREN functionArgs? RPAREN
;

// Function arguments (comma-separated)
functionArgs
: functionArg (COMMA functionArg)*
;

// A single function argument
// Note: functionCall must come before relativeLocationPath
// because both can start with QNAME, but we need to check for '(' to distinguish them
functionArg
: absoluteLocationPath
| functionCall
| relativeLocationPath
| stringLiteral
| NUMBER
;

// Child element test in predicate
childElementTest
: QNAME
| WILDCARD
;

// String literal value
stringLiteral
: STRING_LITERAL
;
Loading