Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

<regex>: Revise caret parsing in basic and grep mode #5165

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
fe5a1f1
`<regex>`: Circumflex ^ should negate character classes in basic regu…
muellerj2 Dec 5, 2024
fbe0a0c
Revert product code.
StephanTLavavej Dec 9, 2024
f787265
Move (and De Morgan) `_L_anch_rstr` logic to "add bol node".
StephanTLavavej Dec 9, 2024
3269019
circumflex => Caret
StephanTLavavej Dec 9, 2024
fcd95cf
Expand test coverage.
StephanTLavavej Dec 9, 2024
edada37
fix miscompilation of double carets and treat carets as anchors at th…
muellerj2 Dec 15, 2024
d3de073
add test coverage for double carets and caret anchors at the beginnin…
muellerj2 Dec 15, 2024
625e1a5
Make _L_star_beg setting orthogonal to _L_anchr_restr
muellerj2 Dec 15, 2024
3363b6d
fix inverted condition
muellerj2 Dec 15, 2024
b6c2765
add test coverage for initial * in expressions, subexpressions and al…
muellerj2 Dec 15, 2024
f10b6b2
Merge branch 'main' into fix-negated-char-classes-in-basic-regexes
muellerj2 Dec 15, 2024
38ed60b
adjust comment
muellerj2 Dec 16, 2024
f827e28
extend and clean up tests
muellerj2 Dec 17, 2024
c93343d
Merge branch 'main' into fix-negated-char-classes-in-basic-regexes
StephanTLavavej Jan 14, 2025
4aa7599
Merge branch 'main' into fix-negated-char-classes-in-basic-regexes
StephanTLavavej Mar 20, 2025
3fa5dc4
Adjust newlines.
StephanTLavavej Mar 20, 2025
0dc4ca7
Add 'the'.
StephanTLavavej Mar 20, 2025
003382a
Simplify raw string literals: `R"-(...)-"` => `R"(...)"`
StephanTLavavej Mar 20, 2025
9076887
Fix inconsistent pattern.
StephanTLavavej Mar 20, 2025
e1dd4b9
Use raw string literals when newlines aren't involved.
StephanTLavavej Mar 20, 2025
4bc72f5
Fix mistakenly duplicated test line.
StephanTLavavej Mar 20, 2025
f6725da
Extract basic-only and grep-only tests into separate functions.
StephanTLavavej Mar 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 16 additions & 24 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1511,7 +1511,6 @@ public:
using _Difft = typename iterator_traits<_FwdIt>::difference_type;

_Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type);
bool _Beg_expr() const;
void _Setlong();
// _Discard_pattern is an ABI zombie name
void _Tidy() noexcept;
Expand Down Expand Up @@ -1547,7 +1546,6 @@ private:
static void _Insert_node(_Node_base*, _Node_base*);
_Node_base* _New_node(_Node_type _Kind);
void _Add_str_node();
bool _Beg_expr(_Node_base*) const;
void _Add_char_to_bitmap(_Elem _Ch);
void _Add_char_to_array(_Elem _Ch);
void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool);
Expand Down Expand Up @@ -2784,17 +2782,6 @@ _Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Getmark() const {
return _Current;
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Builder<_FwdIt, _Elem, _RxTraits>::_Beg_expr(_Node_base* _Nx) const {
// test for beginning of expression or subexpression
return _Nx->_Kind == _N_begin || _Nx->_Kind == _N_group || _Nx->_Kind == _N_capture;
}

template <class _FwdIt, class _Elem, class _RxTraits>
bool _Builder<_FwdIt, _Elem, _RxTraits>::_Beg_expr() const { // test for beginning of expression or subexpression
return _Beg_expr(_Current) || (_Current->_Kind == _N_bol && _Beg_expr(_Current->_Prev));
}

template <class _FwdIt, class _Elem, class _RxTraits>
_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // insert _Nx at current location
_Nx->_Prev = _Current;
Expand Down Expand Up @@ -3905,17 +3892,16 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char
break;

case _Meta_star:
if ((_L_flags & _L_star_beg) && _Nfa._Beg_expr()) {
_Mchar = _Meta_chr;
}

// A star can always act as a quantifier outside bracket expressions,
// but _L_star_beg (used by basic/grep) allows its use as an ordinary character
// at the beginning of a (sub-)expression (potentially after an optional caret anchor).
// We'll handle that when we are parsing alternatives in disjunctions.
break;

case _Meta_caret:
if ((_L_flags & _L_anch_rstr) && !_Nfa._Beg_expr()) {
_Mchar = _Meta_chr;
}

// A caret can always negate a bracket expression,
// but _L_anch_rstr (used by basic/grep) restricts caret anchors to the beginning.
// We'll handle that restriction when we're about to add a bol node.
break;

case _Meta_dlr:
Expand Down Expand Up @@ -4481,15 +4467,21 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte
_Next();
_Quant = _Wrapped_disjunction();
_Expect(_Meta_rpar, regex_constants::error_paren);
} else if (_Mchar == _Meta_caret) { // add bol node
} else if (_Mchar == _Meta_caret && (!(_L_flags & _L_anch_rstr) || !_Found)) { // add bol node
_Nfa._Add_bol();
_Next();
_Quant = false;
if ((_L_flags & _L_star_beg) && _Mchar == _Meta_star && !_Found) {
_Nfa._Add_char(_Char);
_Next();
} else {
_Quant = false;
}
} else if (_Mchar == _Meta_dlr) { // add eol node
_Nfa._Add_eol();
_Next();
_Quant = false;
} else if (_Mchar == _Meta_star || _Mchar == _Meta_plus || _Mchar == _Meta_query || _Mchar == _Meta_lbr) {
} else if ((_Mchar == _Meta_star && (!(_L_flags & _L_star_beg) || _Found)) || _Mchar == _Meta_plus
|| _Mchar == _Meta_query || _Mchar == _Meta_lbr) {
_Error(regex_constants::error_badrepeat);
} else if (_Mchar == _Meta_rbr && !(_L_flags & _L_paren_bal)) {
_Error(regex_constants::error_brace);
Expand Down
Loading